From 20fc9b6a66de6204bfcd2e96c63ba59cd8d24953 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 10:34:05 +0000 Subject: [PATCH 001/203] bumped RoxygenNote in DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c490d11..241630f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ Suggests: License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) Depends: R (>= 2.10) From 091f93f6363277a63d450e76349de0728935c1ee Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 10:35:06 +0000 Subject: [PATCH 002/203] removed tidyverse dependency from DESCRIPTION --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 241630f..94648c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,6 @@ Imports: stringr, tibble, tidyr, - tidyverse, urltools Suggests: devtools, From b9313e083e7bf2c0baca6e0bc753a1c6de0ce85e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 16:53:22 +0000 Subject: [PATCH 003/203] added testing infrastructure and a test for get_cases to run locally (skipped by default) --- tests/testthat.R | 12 +++++ tests/testthat/test-get_cases.R | 95 +++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-get_cases.R diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..7427e8c --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/tests.html +# * https://testthat.r-lib.org/reference/test_package.html#special-files + +library(testthat) +library(godataR) + +test_check("godataR") diff --git a/tests/testthat/test-get_cases.R b/tests/testthat/test-get_cases.R new file mode 100644 index 0000000..631b19c --- /dev/null +++ b/tests/testthat/test-get_cases.R @@ -0,0 +1,95 @@ +test_that("get_cases works as expected", { + skip("get_cases requires API call") + + res <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = "export", + batch_size = 50000, + wait = 2, + file.type = "json" + ) + + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(13L, 357L)) + expect_true( + all(c( + "id", "visualId", "dateOfReporting", "isDateOfReportingApproximate", + "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "firstName", "middleName", "lastName", "gender", + "occupation", "dob", "classification", "wasContact", "dateBecomeCase", + "wasCase", "dateOfInfection", "dateOfOnset", "riskLevel", "riskReason", + "outcomeId", "dateOfOutcome", "documents", "type", "dateRanges", + "transferRefused", "addresses", "safeBurial", "dateOfBurial", + "isDateOfOnsetApproximate", "numberOfExposures", "numberOfContacts", + "burialLocationId", "burialLocationId Identifiers", + "burialLocationId Location geographical level", + "burialLocationId Parent location", "burialPlaceName", + "investigationStatus", "dateInvestigationCompleted", "vaccinesReceived", + "pregnancyStatus", "responsibleUserId", "age.years", "age.months" + ) %in% colnames(res)) + ) + + expect_true( + all(grepl(pattern = "^questionnaireAnswers", x = colnames(res)[50:357])) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "logical", "character", + "character", "character", "character", "logical", "logical", "character", + "character", "character", "character", "character", "character", + "character", "character", "logical", "character", "logical", "character", + "character", "character", "character", "character", "character", "list", + "character", "list", "logical", "list", "logical", "logical", "logical", + "integer", "integer", "logical", "list", "list", "list", "logical", + "character", "logical", "list", "character", "character", "integer", + "integer", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "list", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "logical", + "list", "list", "list", "list", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "list", "logical", "logical", "list", "logical", "logical", + "logical", "list", "logical", "list", "logical", "list", "logical", + "list", "logical", "list", "logical", "list", "logical", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "logical", "list", "logical", "list", + "logical", "logical", "logical", "logical", "list", "logical", "logical", + "logical", "logical", "list", "logical", "list", "logical", "list", + "list", "logical", "list", "logical", "list", "logical", "list", + "logical", "list", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "list", "list", "list")) +}) From a9a59491d0e5a419282fb92e202324bf0385f87c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 16:53:45 +0000 Subject: [PATCH 004/203] added testthat to DESCRIPTION --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 94648c8..ef8a27d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,8 +30,8 @@ Imports: tidyr, urltools Suggests: - devtools, - testthat + devtools, + testthat (>= 3.0.0) License: MIT + file LICENSE Encoding: UTF-8 LazyData: true @@ -39,3 +39,4 @@ RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) Depends: R (>= 2.10) +Config/testthat/edition: 3 From f72716993a68627824a13c90011486bc33ca4f04 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 16:54:44 +0000 Subject: [PATCH 005/203] linted style of get_cases --- R/get_cases.R | 90 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/R/get_cases.R b/R/get_cases.R index 41b8e39..616fe73 100644 --- a/R/get_cases.R +++ b/R/get_cases.R @@ -22,17 +22,27 @@ #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. +#' Don't forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method="export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. +#' See Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file.type If `method = "export"`, then `file.type` determines Whether +#' the resulting data frame should contain nested fields (`file.type = "json"`, +#' the default) or an entirely flat data structure (`file.type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' `\link[tidyr]{nest}` for assitance with unnesting. #' @export #' #' @examples @@ -56,22 +66,32 @@ #' @importFrom purrr pluck -get_cases <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_cases <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file.type = c("json", "csv")) { #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + active_outbreak_id <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak_id) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) + version.check <- check_godata_version(url = url) if (!version.check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { @@ -81,14 +101,16 @@ get_cases <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/cases/count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/cases") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0(url, "api/outbreaks/", outbreak_id, "/cases/count") + api_call_get <- paste0(url, "api/outbreaks/", outbreak_id, "/cases") + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { @@ -96,13 +118,15 @@ get_cases <- function(url=url, if (missing(file.type)) file.type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/cases/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) + api_call_request <- paste0(url, "api/outbreaks/", outbreak_id, "/cases/export") + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file.type = file.type, + wait = wait + ) } From 976d73fd8a4017036263fc0b0b5c28b1ed312b37 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 17:04:54 +0000 Subject: [PATCH 006/203] changed file.type argument and variables to snake case --- R/get_cases.R | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/R/get_cases.R b/R/get_cases.R index 616fe73..4688289 100644 --- a/R/get_cases.R +++ b/R/get_cases.R @@ -10,7 +10,7 @@ #' Go.Data. This method relies on the GET outbreak/{id}/cases #' API endpoint. Records are then retrieved in batches #' based on `batch_size` and appended together into -#' a final dataset. `method="batches"` will be the default and +#' a final dataset. `method = "batches"` will be the default and #' only available method for Go.Data version 2.38.0 or older. #' #' `method="export"` will only work on Go.Data versions @@ -18,7 +18,7 @@ #' outbreak/{id}/cases/export API endpoint. An export #' request is submitted to the server, and then when the #' export is ready, it will be downloaded. Due to better -#' performance and more options, `method="export"` will +#' performance and more options, `method = "export"` will #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' @@ -28,16 +28,16 @@ #' @param password The password for your Go.Data login #' @param outbreak_id The id number for the outbreak for which you want to #' download data. -#' @param method The method to download data. `method="export"` is the +#' @param method The method to download data. `method = "export"` is the #' preferred and default method for Go.Data version 2.38.1 or later. #' See Details. #' @param batch_size If `method = "batches"`, then `batch_size` specifies the #' number of records to retrieve in each iteration. #' @param wait If `method = "export"`, then `wait` is the number of seconds to #' wait in between iterations of checking the status of the export. -#' @param file.type If `method = "export"`, then `file.type` determines Whether -#' the resulting data frame should contain nested fields (`file.type = "json"`, -#' the default) or an entirely flat data structure (`file.type = "csv"`) +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return #' Returns a data frame. Some fields, such as addresses, hospitalization @@ -73,7 +73,7 @@ get_cases <- function(url = url, method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv")) { + file_type = c("json", "csv")) { #Check that outbreak_id is active active_outbreak_id <- get_active_outbreak( @@ -91,8 +91,8 @@ get_cases <- function(url = url, } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url = url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -114,22 +114,21 @@ get_cases <- function(url = url, } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + # Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url, "api/outbreaks/", outbreak_id, "/cases/export") + api_call_request <- paste0( + url, "api/outbreaks/", outbreak_id, "/cases/export" + ) df <- export_downloader( url = url, username = username, password = password, api_call_request = api_call_request, - file.type = file.type, + file_type = file_type, wait = wait ) - - } - return(df) } From 56417f463ed067c4cbece30edb12658fcfa965f0 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 17:05:18 +0000 Subject: [PATCH 007/203] updated get_cases documentation --- man/get_cases.Rd | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/man/get_cases.Rd b/man/get_cases.Rd index ca96123..6009004 100644 --- a/man/get_cases.Rd +++ b/man/get_cases.Rd @@ -12,28 +12,38 @@ get_cases( method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. +Don't forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. +See Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\verb{\link[tidyr]\{nest\}} for assitance with unnesting. } \description{ A function to retrieve the case data for a @@ -47,7 +57,7 @@ are two methods for downloading the data: Go.Data. This method relies on the GET outbreak/{id}/cases API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into -a final dataset. \code{method="batches"} will be the default and +a final dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. \code{method="export"} will only work on Go.Data versions @@ -55,7 +65,7 @@ only available method for Go.Data version 2.38.0 or older. outbreak/{id}/cases/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to better -performance and more options, \code{method="export"} will +performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } From 70bf9823dddd4b6b5b3be36d7541fde12ffae694 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 19 Jan 2023 17:13:09 +0000 Subject: [PATCH 008/203] remove imports that are not used in get_cases --- R/get_cases.R | 9 --------- 1 file changed, 9 deletions(-) diff --git a/R/get_cases.R b/R/get_cases.R index 4688289..c8b69a3 100644 --- a/R/get_cases.R +++ b/R/get_cases.R @@ -57,15 +57,6 @@ #' password=password, #' outbreak_id=outbreak_id) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - get_cases <- function(url = url, username = username, password = password, From 987cd451901ed8006858018687269283b20ef5e4 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 08:21:38 +0000 Subject: [PATCH 009/203] changed get_cases output from data frame to tibble --- R/get_cases.R | 12 +++++++----- man/get_cases.Rd | 10 ++++++---- tests/testthat/test-get_cases.R | 3 ++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/R/get_cases.R b/R/get_cases.R index c8b69a3..d04f0c6 100644 --- a/R/get_cases.R +++ b/R/get_cases.R @@ -52,10 +52,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' cases <- get_cases(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } get_cases <- function(url = url, username = username, @@ -121,5 +123,5 @@ get_cases <- function(url = url, wait = wait ) } - return(df) + return(tibble::as_tibble(df)) } diff --git a/man/get_cases.Rd b/man/get_cases.Rd index 6009004..aa28bbd 100644 --- a/man/get_cases.Rd +++ b/man/get_cases.Rd @@ -76,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -cases <- get_cases(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } diff --git a/tests/testthat/test-get_cases.R b/tests/testthat/test-get_cases.R index 631b19c..504cb8d 100644 --- a/tests/testthat/test-get_cases.R +++ b/tests/testthat/test-get_cases.R @@ -9,9 +9,10 @@ test_that("get_cases works as expected", { method = "export", batch_size = 50000, wait = 2, - file.type = "json" + file_type = "json" ) + expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") expect_identical(dim(res), c(13L, 357L)) expect_true( From f32c6a31e71525dc38bf26e4c2ea9dbb95391198 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 08:56:41 +0000 Subject: [PATCH 010/203] linted get_active_outbreak --- R/get_active_outbreak.R | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/R/get_active_outbreak.R b/R/get_active_outbreak.R index a87b6a1..1b01f79 100644 --- a/R/get_active_outbreak.R +++ b/R/get_active_outbreak.R @@ -7,7 +7,8 @@ #' housekeeping function used in many of the #' other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login. #' @@ -20,9 +21,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' active_outbreak_id <- get_active_outbreak(url=url, -#' username=username, -#' password=password) +#' active_outbreak_id <- get_active_outbreak( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -30,17 +33,27 @@ #' @import httr #' @importFrom jsonlite fromJSON -get_active_outbreak <- function(url=url, - username=username, - password=password) { +get_active_outbreak <- function(url = url, + username = username, + password = password) { - users <- GET(paste0(url,"api/users", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) + users <- GET( + paste0( + url, + "api/users", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) - active.outbreak <- users$activeOutbreakId[users$email==username] + active_outbreak <- users$activeOutbreakId[users$email == username] - return(active.outbreak) + return(active_outbreak) } From fc26e4c751519c5ed80d71b32ddee144b7e72478 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 08:57:19 +0000 Subject: [PATCH 011/203] added tests for get_active_outbreak (skipped by default) --- tests/testthat/test-get_active_outbreak.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/testthat/test-get_active_outbreak.R diff --git a/tests/testthat/test-get_active_outbreak.R b/tests/testthat/test-get_active_outbreak.R new file mode 100644 index 0000000..d000c2f --- /dev/null +++ b/tests/testthat/test-get_active_outbreak.R @@ -0,0 +1,17 @@ +test_that("get_active_outbreak works as expected", { + skip("get_active_outbreak requires API call") + + res <- get_active_outbreak( + url = url, + username = username, + password = password + ) + + expect_type(res, "character") + # character string can contain alphanumeric characters + expect_true(grepl(pattern = "[:alphanum:]", x = res)) + # character string can contain punctuation marks + expect_true(grepl(pattern = "[:punct:]", x = res)) + # character string cannot contain spaces + expect_false(grepl(pattern = "\\s", x = res)) +}) From ceb3a7f1e2761242067185a9ea3d79e199da84d1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 08:59:44 +0000 Subject: [PATCH 012/203] removed pipes from get_active_outbreak --- R/get_active_outbreak.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/get_active_outbreak.R b/R/get_active_outbreak.R index 1b01f79..05db1bb 100644 --- a/R/get_active_outbreak.R +++ b/R/get_active_outbreak.R @@ -32,12 +32,12 @@ #' @import tidyr #' @import httr #' @importFrom jsonlite fromJSON - get_active_outbreak <- function(url = url, username = username, password = password) { - users <- GET( + + godata_url <- GET( paste0( url, "api/users", @@ -48,9 +48,11 @@ get_active_outbreak <- function(url = url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) + ) + + url_content <- content(godata_url, as = "text") + + users <- fromJSON(url_content, flatten = TRUE) active_outbreak <- users$activeOutbreakId[users$email == username] From 6f3066565058102f231f50dc07881345d59a3dbf Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:02:15 +0000 Subject: [PATCH 013/203] used explicit namespace instead of import in get_active_outbreak --- R/get_active_outbreak.R | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/R/get_active_outbreak.R b/R/get_active_outbreak.R index 05db1bb..2df6c04 100644 --- a/R/get_active_outbreak.R +++ b/R/get_active_outbreak.R @@ -27,17 +27,11 @@ #' password = password #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON get_active_outbreak <- function(url = url, username = username, password = password) { - - godata_url <- GET( + godata_url <- httr::GET( paste0( url, "api/users", @@ -50,12 +44,11 @@ get_active_outbreak <- function(url = url, ) ) - url_content <- content(godata_url, as = "text") + url_content <- httr::content(godata_url, as = "text") - users <- fromJSON(url_content, flatten = TRUE) + users <- jsonlite::fromJSON(url_content, flatten = TRUE) active_outbreak <- users$activeOutbreakId[users$email == username] return(active_outbreak) - } From 5cf6e5d7b6cf29fd6a4162f736d5a53c36824080 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:07:36 +0000 Subject: [PATCH 014/203] added comments to get_active_outbreak --- R/get_active_outbreak.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/get_active_outbreak.R b/R/get_active_outbreak.R index 2df6c04..b28509f 100644 --- a/R/get_active_outbreak.R +++ b/R/get_active_outbreak.R @@ -31,6 +31,7 @@ get_active_outbreak <- function(url = url, username = username, password = password) { + # get request to go.data godata_url <- httr::GET( paste0( url, @@ -44,10 +45,13 @@ get_active_outbreak <- function(url = url, ) ) + # unpack request as character string url_content <- httr::content(godata_url, as = "text") + # converts JSON string into data frame users <- jsonlite::fromJSON(url_content, flatten = TRUE) + # subset to active user active_outbreak <- users$activeOutbreakId[users$email == username] return(active_outbreak) From f0606a1bf61c3d50b341e7bf446a1535d973f574 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:10:00 +0000 Subject: [PATCH 015/203] updated get_active_outbreak documentation --- man/get_active_outbreak.Rd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/man/get_active_outbreak.Rd b/man/get_active_outbreak.Rd index 1d93daa..185500c 100644 --- a/man/get_active_outbreak.Rd +++ b/man/get_active_outbreak.Rd @@ -7,7 +7,8 @@ get_active_outbreak(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -30,8 +31,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -active_outbreak_id <- get_active_outbreak(url=url, - username=username, - password=password) +active_outbreak_id <- get_active_outbreak( + url = url, + username = username, + password = password +) } } From 00cca1b5bd308beffa96ec40965c532aa6e345e3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:23:57 +0000 Subject: [PATCH 016/203] linted check_godata_version --- R/check_godata_version.R | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/R/check_godata_version.R b/R/check_godata_version.R index 8557dae..1520530 100644 --- a/R/check_godata_version.R +++ b/R/check_godata_version.R @@ -5,7 +5,8 @@ #' This is a housekeeping function used in #' many of the other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' #' @return Boolean, where `TRUE` indicates version 2.38.1 or later. #' @examples @@ -15,33 +16,33 @@ #' } #' @importFrom magrittr %>% #' @importFrom stringr str_split -check_godata_version <- function(url=url) { +check_godata_version <- function(url = url) { # Get Current Version of Go.Data - gd.version <- get_godata_version(url=url) + gd_version <- get_godata_version(url = url) # Convert string to vector of 3 numbers - gd.version <- str_split(gd.version, "[.]") %>% + gd_version <- str_split(gd_version, "[.]") %>% unlist() %>% as.numeric() # Check if 2.38.1 or later # Should be TRUE if it is version 2.38.1 or later & # FALSE if version 2.38.0 or earlier - if (gd.version[1] < 2) { - after.2.38.1 <- FALSE - } else if (gd.version[1]==2 & gd.version[2] < 38) { - after.2.38.1 <- FALSE - } else if (gd.version[1]==2 & gd.version[2]==38 & gd.version[3]<1) { - after.2.38.1 <- FALSE - } else if (gd.version[1]==2 & gd.version[2]==38 & gd.version[3]>=1) { - after.2.38.1 <- TRUE - } else if (gd.version[1]==2 & gd.version[2]>38) { - after.2.38.1 <- TRUE - } else if (gd.version[1]>2) { - after.2.38.1 <- TRUE + if (gd_version[1] < 2) { + after_2_38_1 <- FALSE + } else if (gd_version[1] == 2 && gd_version[2] < 38) { + after_2_38_1 <- FALSE + } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] < 1) { + after_2_38_1 <- FALSE + } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] >= 1) { + after_2_38_1 <- TRUE + } else if (gd_version[1] == 2 && gd_version[2] > 38) { + after_2_38_1 <- TRUE + } else if (gd_version[1] > 2) { + after_2_38_1 <- TRUE } - return(after.2.38.1) + return(after_2_38_1) } From 8ad29dc5861263616ffae5d8b8cdd7a6dc210b1b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:24:12 +0000 Subject: [PATCH 017/203] updated documentation for check_godata_version --- man/check_godata_version.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/check_godata_version.Rd b/man/check_godata_version.Rd index 42bbb5c..0a1b413 100644 --- a/man/check_godata_version.Rd +++ b/man/check_godata_version.Rd @@ -7,7 +7,8 @@ check_godata_version(url = url) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} } \value{ Boolean, where \code{TRUE} indicates version 2.38.1 or later. From b6dc35d88728b3e08ff7bdbafec8cbc5d4c4656b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:24:35 +0000 Subject: [PATCH 018/203] added test for check_godata_version (skipped by default) --- tests/testthat/test-check_godata_version.R | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/testthat/test-check_godata_version.R diff --git a/tests/testthat/test-check_godata_version.R b/tests/testthat/test-check_godata_version.R new file mode 100644 index 0000000..823650a --- /dev/null +++ b/tests/testthat/test-check_godata_version.R @@ -0,0 +1,5 @@ +test_that("check_godata_version works as expected", { + skip("check_godata_version requires API call") + + expect_true(check_godata_version(url = url)) +}) From 2833b13d7293237ff2ea95463200b03f925e6d6f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:26:43 +0000 Subject: [PATCH 019/203] removed pipes from check_godata_version --- R/check_godata_version.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/check_godata_version.R b/R/check_godata_version.R index 1520530..d6407ba 100644 --- a/R/check_godata_version.R +++ b/R/check_godata_version.R @@ -22,9 +22,9 @@ check_godata_version <- function(url = url) { gd_version <- get_godata_version(url = url) # Convert string to vector of 3 numbers - gd_version <- str_split(gd_version, "[.]") %>% - unlist() %>% - as.numeric() + gd_version <- str_split(gd_version, "[.]") + + gd_version <- as.numeric(unlist(gd_version)) # Check if 2.38.1 or later # Should be TRUE if it is version 2.38.1 or later & From d1833a51a9a1b47abfd7169926484c4647e72393 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:30:44 +0000 Subject: [PATCH 020/203] used explicit namespace instead of import for check_godata_version --- NAMESPACE | 1 - R/check_godata_version.R | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 500a962..6c64574 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,5 +42,4 @@ import(urltools) importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(purrr,pluck) -importFrom(stringr,str_split) importFrom(utils,read.csv) diff --git a/R/check_godata_version.R b/R/check_godata_version.R index d6407ba..00d8d15 100644 --- a/R/check_godata_version.R +++ b/R/check_godata_version.R @@ -12,17 +12,15 @@ #' @examples #' \dontrun{ #' url <- "https://MyGoDataServer.com/" -#' check_godata_version(url=url) +#' check_godata_version(url = url) #' } -#' @importFrom magrittr %>% -#' @importFrom stringr str_split check_godata_version <- function(url = url) { # Get Current Version of Go.Data gd_version <- get_godata_version(url = url) # Convert string to vector of 3 numbers - gd_version <- str_split(gd_version, "[.]") + gd_version <- stringr::str_split(gd_version, "[.]") gd_version <- as.numeric(unlist(gd_version)) From fb6581ff4d9395e21542e811b5da933dfc679310 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 09:31:07 +0000 Subject: [PATCH 021/203] updated documentation for check_godata_version --- man/check_godata_version.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/check_godata_version.Rd b/man/check_godata_version.Rd index 0a1b413..46c6dff 100644 --- a/man/check_godata_version.Rd +++ b/man/check_godata_version.Rd @@ -22,6 +22,6 @@ many of the other \code{godataR} functions. \examples{ \dontrun{ url <- "https://MyGoDataServer.com/" -check_godata_version(url=url) +check_godata_version(url = url) } } From f73a5244a54dfa79bff4c9db2a7109cfde0dea50 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:09:48 +0000 Subject: [PATCH 022/203] reduce cyclomatic complexity of check_godata_version --- R/check_godata_version.R | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/R/check_godata_version.R b/R/check_godata_version.R index 00d8d15..8cdd754 100644 --- a/R/check_godata_version.R +++ b/R/check_godata_version.R @@ -28,19 +28,12 @@ check_godata_version <- function(url = url) { # Should be TRUE if it is version 2.38.1 or later & # FALSE if version 2.38.0 or earlier if (gd_version[1] < 2) { - after_2_38_1 <- FALSE + return(FALSE) } else if (gd_version[1] == 2 && gd_version[2] < 38) { - after_2_38_1 <- FALSE - } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] < 1) { - after_2_38_1 <- FALSE - } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] >= 1) { - after_2_38_1 <- TRUE - } else if (gd_version[1] == 2 && gd_version[2] > 38) { - after_2_38_1 <- TRUE - } else if (gd_version[1] > 2) { - after_2_38_1 <- TRUE + return(FALSE) + } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] == 0) { + return(FALSE) + } else { + return(TRUE) } - - return(after_2_38_1) - } From f6d894f0b7f19c4adfa570de77be22b92f527a7a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:29:02 +0000 Subject: [PATCH 023/203] linted get_godata_version --- R/get_godata_version.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/get_godata_version.R b/R/get_godata_version.R index 0ae96e0..260c7ee 100644 --- a/R/get_godata_version.R +++ b/R/get_godata_version.R @@ -5,7 +5,8 @@ #' housekeeping function used in many of the #' other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' #' @return string #' @export @@ -13,22 +14,22 @@ #' @examples #' \dontrun{ #' url <- "https://MyGoDataServer.com/" -#' get_godata_version(url=url) +#' get_godata_version(url = url) #' } #' @import httr #' @importFrom jsonlite fromJSON #' @importFrom magrittr %>% -get_godata_version <- function(url=url) { +get_godata_version <- function(url = url) { - version.request <- GET(paste0(url,"api/system-settings/version")) + version_request <- GET(paste0(url, "api/system-settings/version")) - if (version.request$status_code==200) { - version <- content(version.request, as="text") %>% - fromJSON(flatten=TRUE) + if (version_request$status_code == 200) { + version <- content(version_request, as = "text") %>% + fromJSON(flatten = TRUE) return(version$version) } else { - stop(paste0("Error ",version.request$status_code)) + stop(paste0("Error ", version_request$status_code)) } } From e1f79720e8388d85e0032333ff2c1f6765462c41 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:29:29 +0000 Subject: [PATCH 024/203] added test for get_godata_version (skipped by default) --- tests/testthat/test-get_godata_version.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/testthat/test-get_godata_version.R diff --git a/tests/testthat/test-get_godata_version.R b/tests/testthat/test-get_godata_version.R new file mode 100644 index 0000000..6a8e862 --- /dev/null +++ b/tests/testthat/test-get_godata_version.R @@ -0,0 +1,13 @@ +test_that("get_godata_versions works as expected", { + skip("get_godata_version requires API call") + + res <- get_godata_version(url = url) + + expect_type(res, "character") + # character string can contain digits + expect_true(grepl(pattern = "\\d",x = res)) + # character string can contain full stops + expect_true(grepl(pattern = ".", x = res)) + # character string cannot contain alphabetic characters + expect_false(grepl(pattern = "[:alpha:]", x = res)) +}) From 95511f175cabb2caab6a45c90fc1841488a2fe2a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:29:48 +0000 Subject: [PATCH 025/203] updated documentation for get_godata_version --- man/get_godata_version.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/get_godata_version.Rd b/man/get_godata_version.Rd index 809e38f..db8cd2a 100644 --- a/man/get_godata_version.Rd +++ b/man/get_godata_version.Rd @@ -7,7 +7,8 @@ get_godata_version(url = url) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} } \value{ string @@ -21,6 +22,6 @@ other \code{godataR} functions. \examples{ \dontrun{ url <- "https://MyGoDataServer.com/" -get_godata_version(url=url) +get_godata_version(url = url) } } From fefc74e642f89dbf580de26d088cc7038a09af2e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:32:47 +0000 Subject: [PATCH 026/203] removed pipe from get_godata_version --- R/get_godata_version.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/get_godata_version.R b/R/get_godata_version.R index 260c7ee..d4c8c7f 100644 --- a/R/get_godata_version.R +++ b/R/get_godata_version.R @@ -19,17 +19,15 @@ #' @import httr #' @importFrom jsonlite fromJSON #' @importFrom magrittr %>% - get_godata_version <- function(url = url) { version_request <- GET(paste0(url, "api/system-settings/version")) if (version_request$status_code == 200) { - version <- content(version_request, as = "text") %>% - fromJSON(flatten = TRUE) + version <- content(version_request, as = "text") + version <- fromJSON(version, flatten = TRUE) return(version$version) } else { stop(paste0("Error ", version_request$status_code)) } - } From 6d5a167843c4756ff31079594820b8b5422d9d17 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 10:34:18 +0000 Subject: [PATCH 027/203] used explicit namespace instead of import in get_godata_version --- R/get_godata_version.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/get_godata_version.R b/R/get_godata_version.R index d4c8c7f..375cabc 100644 --- a/R/get_godata_version.R +++ b/R/get_godata_version.R @@ -16,16 +16,13 @@ #' url <- "https://MyGoDataServer.com/" #' get_godata_version(url = url) #' } -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom magrittr %>% get_godata_version <- function(url = url) { - version_request <- GET(paste0(url, "api/system-settings/version")) + version_request <- httr::GET(paste0(url, "api/system-settings/version")) if (version_request$status_code == 200) { - version <- content(version_request, as = "text") - version <- fromJSON(version, flatten = TRUE) + version <- httr::content(version_request, as = "text") + version <- jsonlite::fromJSON(version, flatten = TRUE) return(version$version) } else { stop(paste0("Error ", version_request$status_code)) From 25edbda79d4b3f638c846581b1771307f1eed845 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 11:36:59 +0000 Subject: [PATCH 028/203] linted batch_downloader --- R/batch_downloader.R | 71 ++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 86e4af6..2051e66 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -2,15 +2,19 @@ #' #' A housekeeping function to do batch downloads. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @param api_call_n The API url to get the number of records. #' @param api_call_get The API url to GET the records. -#' @param batch_size Specifies the number of records to retrieve in each iteration. +#' @param batch_size Specifies the number of records to retrieve in each +#' iteration. #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' `\link[tidyr]{nest}` for assitance with unnesting. #' #' @examples #' \dontrun{ @@ -41,44 +45,73 @@ batch_downloader <- function(url = url, batch_size = batch_size) { #get total number of records - df_n <- GET(paste0(api_call_n), - add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% + df_n <- GET( + paste0(api_call_n), + add_headers( + Authorization = paste("Bearer", get_access_token( + url = url, + username = username, + password = password + ), sep = " "))) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% unlist() %>% unname() #Import records in batches df <- tibble() batch_size <- batch_size # number of records to import per iteration - skip <-0 + skip <- 0 message("****************************") #Download records in batches, and then append them into a single dataset while (skip < df_n) { #Progress message - if (df_n <= batch_size) message(paste0("...downloading records 1 to ",df_n)) - if (df_n > batch_size) message(paste0("...downloading records ", as.character(skip+1, scientific = FALSE), " to ", format(skip+batch_size, scientific = FALSE))) + if (df_n <= batch_size) { + message(paste0("...downloading records 1 to ", df_n)) + } + if (df_n > batch_size) { + message( + paste0( + "...downloading records ", + as.character(skip + 1, scientific = FALSE), + " to ", + format(skip + batch_size, scientific = FALSE) + ) + ) + } #fetch the batch of records - df.i <- GET(paste0(api_call_get, - "?filter={%22limit%22:",format(batch_size, scientific = FALSE),",%22skip%22:",format(skip, scientific = FALSE),"}"), - add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>% - content(as='text') %>% - fromJSON( flatten=TRUE) %>% + df_i <- GET( + paste0( + api_call_get, + "?filter={%22limit%22:", + format(batch_size, scientific = FALSE), + ",%22skip%22:", + format(skip, scientific = FALSE), + "}" + ), + add_headers( + Authorization = paste("Bearer", get_access_token( + url = url, + username = username, + password = password), + sep = " ") + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% as_tibble() #append the new batch of records to the existing data frame df <- df %>% - bind_rows(df.i) + bind_rows(df_i) #update numbers for the next iteration skip <- skip + batch_size - rm(df.i) + rm(df_i) } rm(batch_size, skip, df_n) return(df) } - - From d75335fd3165755d978ed26a45651f20e5262d4c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 11:37:23 +0000 Subject: [PATCH 029/203] added tests for batch_downloader --- tests/testthat/test-batch_downloader.R | 157 +++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 tests/testthat/test-batch_downloader.R diff --git a/tests/testthat/test-batch_downloader.R b/tests/testthat/test-batch_downloader.R new file mode 100644 index 0000000..e7da9d4 --- /dev/null +++ b/tests/testthat/test-batch_downloader.R @@ -0,0 +1,157 @@ +test_that("batch_downloader works as expected", { + skip("batch_downloader requires API call") + + api_call_n <- paste0(url, "api/outbreaks/", outbreak_id, "/cases/count") + api_call_get <- paste0(url, "api/outbreaks/", outbreak_id, "/cases") + res <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = 50000 + ) + + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(13L, 153L)) + expect_identical( + colnames(res), + c("firstName", "gender", "wasContact", "safeBurial", "classification", + "investigationStatus", "riskLevel", "transferRefused", "vaccinesReceived", + "id", "outbreakId", "visualId", "lastName", "dob", "occupation", + "documents", "addresses", "dateOfReporting", + "isDateOfReportingApproximate", "dateOfLastContact", "dateOfOnset", + "dateRanges", "classificationHistory", "hasRelationships", + "relationshipsRepresentation", "numberOfExposures", "numberOfContacts", + "usualPlaceOfResidenceLocationId", "responsibleUserId", "createdAt", + "createdBy", "updatedAt", "updatedBy", "createdOn", "deleted", + "dateBecomeCase", "wasCase", "active", "followUpHistory", + "isDateOfOnsetApproximate", "outcomeId", "riskReason", "pregnancyStatus", + "dateOfOutcome", "dateOfInfection", "middleName", + "questionnaireAnswers.would_you_like_to_complete_who_basic_case_questionnaire", + "questionnaireAnswers.Case_WhichForm", + "questionnaireAnswers.report_test_reason", + "questionnaireAnswers.Comcond_present", + "questionnaireAnswers.patinfo_occuhcw", + "questionnaireAnswers.expo_travel", + "questionnaireAnswers.expo_visit_healthcare", + "questionnaireAnswers.patcourse", "questionnaireAnswers.Comcond_select", + "questionnaireAnswers.specify_places_and_dates_for_up_to_3_locations_below", + "questionnaireAnswers.expo_travel_country1", + "questionnaireAnswers.expo_travel_city1", + "questionnaireAnswers.expo_travel_date1", + "questionnaireAnswers.FA0_UniqueIDClustNumber", + "questionnaireAnswers.FA0_datacollector_name", + "questionnaireAnswers.FA0_datacollector_institution", + "questionnaireAnswers.FA0_datacollector_telephone", + "questionnaireAnswers.FA0_datacollector_email", + "questionnaireAnswers.FA0_caseidentifier_email", + "questionnaireAnswers.FA0_caseidentifier_socialnumber", + "questionnaireAnswers.FA0_case_countryresidence", + "questionnaireAnswers.FA0_respondent_ispatient", + "questionnaireAnswers.FA0_symptoms_caseshowssymptoms", + "questionnaireAnswers.FA0_respiratorysample_collectedYN", + "questionnaireAnswers.FA0_clinicalcomplications_ARDS", + "questionnaireAnswers.FA0_clinicalcomplications_pneumoniachestXray", + "questionnaireAnswers.FA0_symptom_fever", + "questionnaireAnswers.FA0_symptom_sorethroat", + "questionnaireAnswers.FA0_symptom_runnynose", + "questionnaireAnswers.FA0_symptom_cough", + "questionnaireAnswers.FA0_symptom_shortnessofbreath", + "questionnaireAnswers.FA0_symptom_vomiting", + "questionnaireAnswers.FA0_symptom_nausea", + "questionnaireAnswers.FA0_symptom_diarrhea", + "questionnaireAnswers.FA0_symptom_taste", "questionnaireAnswers.test", + "questionnaireAnswers.which_date_omicron_was_found", + "questionnaireAnswers.place_of_death", + "questionnaireAnswers.patinfo_occuhcw_country", + "questionnaireAnswers.patinfo_occuhcw_city", + "questionnaireAnswers.patinfo_occuhcw_name", + "questionnaireAnswers.country_2", + "questionnaireAnswers.city_2", "questionnaireAnswers.date_2", + "questionnaireAnswers.FA1_UniqueIDClustNumber", + "questionnaireAnswers.FA1_FurtherCaseClassification", + "questionnaireAnswers.FA1_datacollector_name", + "questionnaireAnswers.FA1_datacollector_institution", + "questionnaireAnswers.FA1_datacollector_telephone", + "questionnaireAnswers.FA1_datacollector_email", + "questionnaireAnswers.FA1_respondent_ispatient", + "questionnaireAnswers.FA1_caseidentifier_email", + "questionnaireAnswers.FA1_caseidentifier_socialnumber", + "questionnaireAnswers.FA1_case_countryresidence", + "questionnaireAnswers.FA1_case_nationality", + "questionnaireAnswers.FA1_case_ethnicity", + "questionnaireAnswers.FA1_case_responsiblehealthcentre", + "questionnaireAnswers.FA1_case_nurseryschoolcollege", + "questionnaireAnswers.FA1_carecentre_practicename", + "questionnaireAnswers.FA1_carecentre_treatingphysicianname", + "questionnaireAnswers.FA1_carecentre_casepartofoutbreak", + "questionnaireAnswers.FA1_carecentre_telephone", + "questionnaireAnswers.FA1_carecentre_fax", + "questionnaireAnswers.FA1_carecentre_address", + "questionnaireAnswers.FA1_symptoms_caseshowssymptoms", + "questionnaireAnswers.FA1_symptoms_healthfacilityvisitedYN", + "questionnaireAnswers.FA1_complications_mechanicalventilation", + "questionnaireAnswers.FA1_complications_ARDS", + "questionnaireAnswers.FA1_complications_acuterenalfailure", + "questionnaireAnswers.FA1_complications_cardiacfailure", + "questionnaireAnswers.FA1_complications_consumptivecoagulopathy", + "questionnaireAnswers.FA1_complications_pneumoniachestXray", + "questionnaireAnswers.FA1_complications_other", + "questionnaireAnswers.FA1_complications_EMOrequired", + "questionnaireAnswers.FA1_complications_hypotensionrequiringvasopressors", + "questionnaireAnswers.FA1_preexistingconditions_obesity", + "questionnaireAnswers.FA1_preexistingconditions_cancer", + "questionnaireAnswers.FA1_preexistingconditions_diabetes", + "questionnaireAnswers.FA1_preexistingconditions_HIVotherimmunedeficiency", + "questionnaireAnswers.FA1_preexistingconditions_heartdisease", + "questionnaireAnswers.FA1_preexistingconditions_asthmarequiringmedication", + "questionnaireAnswers.FA1_preexistingconditions_chroniclungdiseasenonasthma", + "questionnaireAnswers.FA1_preexistingconditions_chronicliverdisease", + "questionnaireAnswers.FA1_preexistingconditions_chronichaematologicaldisorder", + "questionnaireAnswers.FA1_preexistingconditions_chronickidneydisease", + "questionnaireAnswers.FA1_preexistingconditions_chronicneurological", + "questionnaireAnswers.FA1_preexistingconditions_organorbonemarrowrecipient", + "questionnaireAnswers.FA1_preexistingconditions_otherpreexistingcondition", + "questionnaireAnswers.FA1_healthcareinteractions_contactemergencynumber", + "questionnaireAnswers.FA1_priorXdayexposure_travelleddomestically", + "questionnaireAnswers.FA1_priorXdayexposure_travelledinternationally", + "questionnaireAnswers.FA1_priorXdayexposure_contactwithcase", + "questionnaireAnswers.FA1_priorXdayexposure_massgathering", + "questionnaireAnswers.FA1_priorXdayexposure_exposedtosimilarillness", + "questionnaireAnswers.FA1_priorXdayexposure_locationexposure", + "questionnaireAnswers.FA1_priorXdayexposure_inpatient", + "questionnaireAnswers.FA1_priorXdayexposure_outpatient", + "questionnaireAnswers.FA1_priorXdayexposure_traditionalhealer", + "questionnaireAnswers.FA1_formcompleted", "age.years", "age.months", + "duplicateKeys.name", "duplicateKeys.document", + "followUp.originalStartDate", "followUp.startDate", "followUp.endDate", + "followUp.status") + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "logical", "logical", "character", "character", + "character", "logical", "list", "character", "character", "character", + "character", "character", "character", "list", "list", "character", + "logical", "character", "character", "list", "list", "logical", "list", + "integer", "integer", "character", "character", "character", "character", + "character", "character", "character", "logical", "character", "logical", + "logical", "list", "logical", "character", "character", "character", + "character", "character", "character", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "integer", "integer", "list", + "list", "character", "character", "character", "character") + ) +}) From 7749434c4a55411ce90c7bcb5b242a016f57e2f2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 11:38:39 +0000 Subject: [PATCH 030/203] updated batch_downloader documentation --- R/batch_downloader.R | 10 ++++++---- man/batch_downloader.Rd | 20 +++++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 2051e66..b843b90 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -23,10 +23,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' cases <- get_cases(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr diff --git a/man/batch_downloader.Rd b/man/batch_downloader.Rd index e70e3de..dc6b191 100644 --- a/man/batch_downloader.Rd +++ b/man/batch_downloader.Rd @@ -14,7 +14,8 @@ batch_downloader( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -24,10 +25,13 @@ batch_downloader( \item{api_call_get}{The API url to GET the records.} -\item{batch_size}{Specifies the number of records to retrieve in each iteration.} +\item{batch_size}{Specifies the number of records to retrieve in each +iteration.} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\verb{\link[tidyr]\{nest\}} for assitance with unnesting. } \description{ A housekeeping function to do batch downloads. @@ -39,9 +43,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -cases <- get_cases(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 984d667961ad904d54c72fed5105d717d314255f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 11:40:14 +0000 Subject: [PATCH 031/203] removed recursive argument defaults for batch_downloader --- R/batch_downloader.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index b843b90..700f0ac 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -39,12 +39,12 @@ #' @importFrom purrr pluck #' #' -batch_downloader <- function(url = url, - username = username, - password = password, - api_call_n = api_call_n, - api_call_get = api_call_get, - batch_size = batch_size) { +batch_downloader <- function(url, + username, + password, + api_call_n, + api_call_get, + batch_size) { #get total number of records df_n <- GET( From 0d54ffbaa05a0cc59d06f017ec061c115e778716 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 11:58:14 +0000 Subject: [PATCH 032/203] removed pipes from batch_downloader and renamed some variables --- R/batch_downloader.R | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 700f0ac..96bcb89 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -46,19 +46,19 @@ batch_downloader <- function(url, api_call_get, batch_size) { - #get total number of records - df_n <- GET( + num_record_request <- GET( paste0(api_call_n), add_headers( Authorization = paste("Bearer", get_access_token( url = url, username = username, password = password - ), sep = " "))) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - unlist() %>% - unname() + ), sep = " "))) + + num_record_content <- content(record_request, as = "text") + + num_records <- fromJSON(record_content, flatten = TRUE) + num_records <- records$count #Import records in batches df <- tibble() @@ -67,13 +67,13 @@ batch_downloader <- function(url, message("****************************") #Download records in batches, and then append them into a single dataset - while (skip < df_n) { + while (skip < num_records) { #Progress message - if (df_n <= batch_size) { - message(paste0("...downloading records 1 to ", df_n)) + if (num_records <= batch_size) { + message(paste0("...downloading records 1 to ", num_records)) } - if (df_n > batch_size) { + if (num_records > batch_size) { message( paste0( "...downloading records ", @@ -85,7 +85,7 @@ batch_downloader <- function(url, } #fetch the batch of records - df_i <- GET( + record_request <- GET( paste0( api_call_get, "?filter={%22limit%22:", @@ -101,19 +101,21 @@ batch_downloader <- function(url, password = password), sep = " ") ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - as_tibble() + ) + + record_content <- content(cases_request, as = "text") + + records <- fromJSON(cases_content, flatten = TRUE) + + records <- as_tibble(cases) #append the new batch of records to the existing data frame - df <- df %>% - bind_rows(df_i) + df <- bind_rows(df, records) #update numbers for the next iteration skip <- skip + batch_size - rm(df_i) + rm(records) } - rm(batch_size, skip, df_n) + rm(batch_size, skip, num_records) return(df) } From 8a9424feb66bfeb3a087c595201a9293e3267c17 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:03:29 +0000 Subject: [PATCH 033/203] used explicit namespace instead of import for batch_downloader --- R/batch_downloader.R | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 96bcb89..6ea5ba0 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -30,15 +30,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck -#' -#' batch_downloader <- function(url, username, password, @@ -46,22 +37,22 @@ batch_downloader <- function(url, api_call_get, batch_size) { - num_record_request <- GET( + num_record_request <- httr::GET( paste0(api_call_n), - add_headers( + httr::add_headers( Authorization = paste("Bearer", get_access_token( url = url, username = username, password = password ), sep = " "))) - num_record_content <- content(record_request, as = "text") + num_record_content <- httr::content(record_request, as = "text") - num_records <- fromJSON(record_content, flatten = TRUE) + num_records <- jsonlite::fromJSON(record_content, flatten = TRUE) num_records <- records$count #Import records in batches - df <- tibble() + df <- tibble::tibble() batch_size <- batch_size # number of records to import per iteration skip <- 0 message("****************************") @@ -85,7 +76,7 @@ batch_downloader <- function(url, } #fetch the batch of records - record_request <- GET( + record_request <- httr::GET( paste0( api_call_get, "?filter={%22limit%22:", @@ -94,7 +85,7 @@ batch_downloader <- function(url, format(skip, scientific = FALSE), "}" ), - add_headers( + httr::add_headers( Authorization = paste("Bearer", get_access_token( url = url, username = username, @@ -103,14 +94,14 @@ batch_downloader <- function(url, ) ) - record_content <- content(cases_request, as = "text") + record_content <- httr::content(cases_request, as = "text") - records <- fromJSON(cases_content, flatten = TRUE) + records <- jsonlite::fromJSON(cases_content, flatten = TRUE) - records <- as_tibble(cases) + records <- tibble::as_tibble(cases) #append the new batch of records to the existing data frame - df <- bind_rows(df, records) + df <- dplyr::bind_rows(df, records) #update numbers for the next iteration skip <- skip + batch_size From a974cfbfb75b23f48e1bfa403516cc51e2364b8f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:06:38 +0000 Subject: [PATCH 034/203] removed manual removing of variables in batch_downloader --- R/batch_downloader.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 6ea5ba0..9d4dd35 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -107,6 +107,5 @@ batch_downloader <- function(url, skip <- skip + batch_size rm(records) } - rm(batch_size, skip, num_records) return(df) } From f2165f8f68748344ef528dfd2a45ee21a238d37a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:12:05 +0000 Subject: [PATCH 035/203] set to NULL rather than rm variable in batch_downloader --- R/batch_downloader.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 9d4dd35..4152b7b 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -105,7 +105,7 @@ batch_downloader <- function(url, #update numbers for the next iteration skip <- skip + batch_size - rm(records) + records <- NULL } return(df) } From a5ce035a19ab09b05c826bd2be08551038492da3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:13:00 +0000 Subject: [PATCH 036/203] updated documentation for batch_downloader --- man/batch_downloader.Rd | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/man/batch_downloader.Rd b/man/batch_downloader.Rd index dc6b191..c1f83c7 100644 --- a/man/batch_downloader.Rd +++ b/man/batch_downloader.Rd @@ -4,14 +4,7 @@ \alias{batch_downloader} \title{Function to manage batch downloads} \usage{ -batch_downloader( - url = url, - username = username, - password = password, - api_call_n = api_call_n, - api_call_get = api_call_get, - batch_size = batch_size -) +batch_downloader(url, username, password, api_call_n, api_call_get, batch_size) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 7e04fbc8feb6b82a68ce1f68f670ead206538d6b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:17:34 +0000 Subject: [PATCH 037/203] fixed variable names in batch_downloader --- R/batch_downloader.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 4152b7b..551fa90 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -46,10 +46,10 @@ batch_downloader <- function(url, password = password ), sep = " "))) - num_record_content <- httr::content(record_request, as = "text") + num_record_content <- httr::content(num_record_request, as = "text") - num_records <- jsonlite::fromJSON(record_content, flatten = TRUE) - num_records <- records$count + num_records <- jsonlite::fromJSON(num_record_content, flatten = TRUE) + num_records <- num_records$count #Import records in batches df <- tibble::tibble() @@ -94,11 +94,11 @@ batch_downloader <- function(url, ) ) - record_content <- httr::content(cases_request, as = "text") + record_content <- httr::content(record_request, as = "text") - records <- jsonlite::fromJSON(cases_content, flatten = TRUE) + records <- jsonlite::fromJSON(record_content, flatten = TRUE) - records <- tibble::as_tibble(cases) + records <- tibble::as_tibble(records) #append the new batch of records to the existing data frame df <- dplyr::bind_rows(df, records) From 2a0ab13adb72c00d90fe95dc3c9d527208b42afa Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 12:19:14 +0000 Subject: [PATCH 038/203] removed redundant assignment in batch_downloader --- R/batch_downloader.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/batch_downloader.R b/R/batch_downloader.R index 551fa90..50fd60f 100644 --- a/R/batch_downloader.R +++ b/R/batch_downloader.R @@ -53,7 +53,6 @@ batch_downloader <- function(url, #Import records in batches df <- tibble::tibble() - batch_size <- batch_size # number of records to import per iteration skip <- 0 message("****************************") From 7ae103bde3362c3a9ac2a1e197bcf3930e88ed4c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 13:45:25 +0000 Subject: [PATCH 039/203] linted export_downloader --- R/export_downloader.R | 125 ++++++++++++++++++++++++++++++++---------- 1 file changed, 96 insertions(+), 29 deletions(-) diff --git a/R/export_downloader.R b/R/export_downloader.R index 8bfa28a..b122c90 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -2,15 +2,21 @@ #' #' A housekeeping function to do export requests & downloads. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @param api_call_request The API url to get the number of records. -#' @param wait The number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) - +#' @param wait The number of seconds to wait in between iterations of checking +#' the status of the export. +#' @param file.type Whether the resulting data frame should contain nested +#' fields (`file.type = "json"`, the default) or an entirely flat data structure +#' (`file.type = "csv"`) +#' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' `\link[tidyr]{nest}` for assitance with unnesting. #' #' @examples #' \dontrun{ @@ -19,10 +25,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' cases <- get_cases(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -32,8 +40,6 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @importFrom utils read.csv -#' -#' export_downloader <- function(url = url, username = username, password = password, @@ -41,37 +47,100 @@ export_downloader <- function(url = url, wait = wait, file.type = file.type) { - request_id <- GET(paste0(api_call_request, - "?filter=%7B%22where%22%3A%7B%22useDbColumns%22%3A%22true%22%2C%20%22dontTranslateValues%22%3A%22true%22%2C%20%22jsonReplaceUndefinedWithNull%22%3A%22true%22%20%7D%7D", - "&type=",file.type, - "&access_token=",get_access_token(url=url, username=username, password=password))) %>% + request_id <- GET( + paste0( + api_call_request, + "?filter=%7B%22where%22%3A%7B%22useDbColumns%22%3A%22true%22%2C%20%22", + "dontTranslateValues%22%3A%22true%22%2C%20%22", + "jsonReplaceUndefinedWithNull%22%3A%22true%22%20%7D%7D", + "&type=", + file.type, + "&access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% content() %>% pluck("exportLogId") #Check status of request periodcially, until finished - #function argument 'wait' determines the number of seconds to wait between iterations - export.request.status <- get_export_status(url=url, username=username, password=password, request_id=request_id) + #function argument 'wait' determines the number of seconds to wait between + #iterations + export_request_status <- get_export_status( + url = url, + username = username, + password = password, + request_id = request_id + ) - while(export.request.status$statusStep != "LNG_STATUS_STEP_EXPORT_FINISHED") { + status_step <- export_request_status$statusStep + while (status_step != "LNG_STATUS_STEP_EXPORT_FINISHED") { Sys.sleep(wait) - export.request.status <- GET(paste0(url,"api/export-logs/",request_id,"?access_token=",get_access_token(url=url, username=username, password=password))) %>% + export_request_status <- GET( + paste0( + url, + "api/export-logs/", + request_id, + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% content() - message(paste0("...processed ",export.request.status$processedNo, " of ", export.request.status$totalNo, " records")) + message( + paste0( + "...processed ", + export_request_status$processedNo, + " of ", + export_request_status$totalNo, + " records" + ) + ) } #Download the export message("...beginning download") - if (file.type=="json") { - df <- GET(paste0(url,"api/export-logs/",request_id,"/download?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content("text", encoding="UTF-8") %>% - fromJSON(flatten=TRUE) + if (file.type == "json") { + df <- GET( + paste0( + url, + "api/export-logs/", + request_id, + "/download?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content("text", encoding = "UTF-8") %>% + fromJSON(flatten = TRUE) # fix one strange variable name names(df)[names(df) %in% "_id"] <- "id" - } else if (file.type=="csv") { - df <- GET(paste0(url,"api/export-logs/",request_id,"/download?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content("text", encoding="UTF-8") %>% + } else if (file.type == "csv") { + df <- GET( + paste0( + url, + "api/export-logs/", + request_id, + "/download?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content("text", encoding = "UTF-8") %>% textConnection() %>% read.csv() names(df)[names(df) %in% "X_id"] <- "id" @@ -80,5 +149,3 @@ export_downloader <- function(url = url, message("...download complete!") return(df) } - - From 2fb3a39cb9515a0ca4584116d2e43ffb37e4a095 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 13:46:01 +0000 Subject: [PATCH 040/203] added tests for export_downloader (skipped by default) --- tests/testthat/test-export_downloader.R | 96 +++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 tests/testthat/test-export_downloader.R diff --git a/tests/testthat/test-export_downloader.R b/tests/testthat/test-export_downloader.R new file mode 100644 index 0000000..90b02c4 --- /dev/null +++ b/tests/testthat/test-export_downloader.R @@ -0,0 +1,96 @@ +test_that("export_downloader works as expected", { + skip("export_downloader requires API call") + + api_call_request <- paste0( + url, "api/outbreaks/", outbreak_id, "/cases/export" + ) + res <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + wait = 2, + file.type = "json" + ) + + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(13L, 357L)) + expect_true( + all(c( + "id", "visualId", "dateOfReporting", "isDateOfReportingApproximate", + "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "firstName", "middleName", "lastName", "gender", + "occupation", "dob", "classification", "wasContact", "dateBecomeCase", + "wasCase", "dateOfInfection", "dateOfOnset", "riskLevel", "riskReason", + "outcomeId", "dateOfOutcome", "documents", "type", "dateRanges", + "transferRefused", "addresses", "safeBurial", "dateOfBurial", + "isDateOfOnsetApproximate", "numberOfExposures", "numberOfContacts", + "burialLocationId", "burialLocationId Identifiers", + "burialLocationId Location geographical level", + "burialLocationId Parent location", "burialPlaceName", + "investigationStatus", "dateInvestigationCompleted", "vaccinesReceived", + "pregnancyStatus", "responsibleUserId", "age.years", "age.months" + ) %in% colnames(res)) + ) + + expect_true( + all(grepl(pattern = "^questionnaireAnswers", x = colnames(res)[50:357])) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "logical", "character", + "character", "character", "character", "logical", "logical", "character", + "character", "character", "character", "character", "character", + "character", "character", "logical", "character", "logical", "character", + "character", "character", "character", "character", "character", "list", + "character", "list", "logical", "list", "logical", "logical", "logical", + "integer", "integer", "logical", "list", "list", "list", "logical", + "character", "logical", "list", "character", "character", "integer", + "integer", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "list", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "logical", + "list", "list", "list", "list", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "list", "logical", "logical", "list", "logical", "logical", + "logical", "list", "logical", "list", "logical", "list", "logical", + "list", "logical", "list", "logical", "list", "logical", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "logical", "list", "logical", "list", + "logical", "logical", "logical", "logical", "list", "logical", "logical", + "logical", "logical", "list", "logical", "list", "logical", "list", + "list", "logical", "list", "logical", "list", "logical", "list", + "logical", "list", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "list", "list", "list")) +}) From 4751cc5d21d2fb7f50229bd51635b2f85209c909 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 13:46:19 +0000 Subject: [PATCH 041/203] updated documentation for export_downloader --- man/export_downloader.Rd | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/man/export_downloader.Rd b/man/export_downloader.Rd index 2558cc4..529151c 100644 --- a/man/export_downloader.Rd +++ b/man/export_downloader.Rd @@ -14,7 +14,8 @@ export_downloader( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -22,12 +23,17 @@ export_downloader( \item{api_call_request}{The API url to get the number of records.} -\item{wait}{The number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{The number of seconds to wait in between iterations of checking +the status of the export.} -\item{file.type}{Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file.type}{Whether the resulting data frame should contain nested +fields (\code{file.type = "json"}, the default) or an entirely flat data structure +(\code{file.type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\verb{\link[tidyr]\{nest\}} for assitance with unnesting. } \description{ A housekeeping function to do export requests & downloads. @@ -39,9 +45,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -cases <- get_cases(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 45455fbfafdd437a69a43158682cac63a02949b8 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 20 Jan 2023 13:49:06 +0000 Subject: [PATCH 042/203] changes file.type argument to file_type in export_downloader --- R/export_downloader.R | 14 +++++++------- man/export_downloader.Rd | 8 ++++---- tests/testthat/test-export_downloader.R | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/export_downloader.R b/R/export_downloader.R index b122c90..aaf9fb6 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -9,9 +9,9 @@ #' @param api_call_request The API url to get the number of records. #' @param wait The number of seconds to wait in between iterations of checking #' the status of the export. -#' @param file.type Whether the resulting data frame should contain nested -#' fields (`file.type = "json"`, the default) or an entirely flat data structure -#' (`file.type = "csv"`) +#' @param file_type Whether the resulting data frame should contain nested +#' fields (`file_type = "json"`, the default) or an entirely flat data structure +#' (`file_type = "csv"`) #' #' @return #' Returns a data frame. Some fields, such as addresses, hospitalization @@ -45,7 +45,7 @@ export_downloader <- function(url = url, password = password, api_call_request = api_call_request, wait = wait, - file.type = file.type) { + file_type = file_type) { request_id <- GET( paste0( @@ -54,7 +54,7 @@ export_downloader <- function(url = url, "dontTranslateValues%22%3A%22true%22%2C%20%22", "jsonReplaceUndefinedWithNull%22%3A%22true%22%20%7D%7D", "&type=", - file.type, + file_type, "&access_token=", get_access_token( url = url, @@ -107,7 +107,7 @@ export_downloader <- function(url = url, #Download the export message("...beginning download") - if (file.type == "json") { + if (file_type == "json") { df <- GET( paste0( url, @@ -126,7 +126,7 @@ export_downloader <- function(url = url, # fix one strange variable name names(df)[names(df) %in% "_id"] <- "id" - } else if (file.type == "csv") { + } else if (file_type == "csv") { df <- GET( paste0( url, diff --git a/man/export_downloader.Rd b/man/export_downloader.Rd index 529151c..0724c1d 100644 --- a/man/export_downloader.Rd +++ b/man/export_downloader.Rd @@ -10,7 +10,7 @@ export_downloader( password = password, api_call_request = api_call_request, wait = wait, - file.type = file.type + file_type = file_type ) } \arguments{ @@ -26,9 +26,9 @@ forget the forward slash "/" at end!} \item{wait}{The number of seconds to wait in between iterations of checking the status of the export.} -\item{file.type}{Whether the resulting data frame should contain nested -fields (\code{file.type = "json"}, the default) or an entirely flat data structure -(\code{file.type = "csv"})} +\item{file_type}{Whether the resulting data frame should contain nested +fields (\code{file_type = "json"}, the default) or an entirely flat data structure +(\code{file_type = "csv"})} } \value{ Returns a data frame. Some fields, such as addresses, hospitalization diff --git a/tests/testthat/test-export_downloader.R b/tests/testthat/test-export_downloader.R index 90b02c4..1a94114 100644 --- a/tests/testthat/test-export_downloader.R +++ b/tests/testthat/test-export_downloader.R @@ -10,7 +10,7 @@ test_that("export_downloader works as expected", { password = password, api_call_request = api_call_request, wait = 2, - file.type = "json" + file_type = "json" ) expect_s3_class(res, "data.frame") From 4e5d1c63d933bd278049a1ae4110099043294d6a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 14:48:01 +0000 Subject: [PATCH 043/203] removed recursive argument defaults from export_downloader --- R/export_downloader.R | 12 ++++++------ man/export_downloader.Rd | 9 +-------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/R/export_downloader.R b/R/export_downloader.R index aaf9fb6..d86adf5 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -40,12 +40,12 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @importFrom utils read.csv -export_downloader <- function(url = url, - username = username, - password = password, - api_call_request = api_call_request, - wait = wait, - file_type = file_type) { +export_downloader <- function(url, + username, + password, + api_call_request, + wait, + file_type) { request_id <- GET( paste0( diff --git a/man/export_downloader.Rd b/man/export_downloader.Rd index 0724c1d..8cc492d 100644 --- a/man/export_downloader.Rd +++ b/man/export_downloader.Rd @@ -4,14 +4,7 @@ \alias{export_downloader} \title{Function to manage export downloads} \usage{ -export_downloader( - url = url, - username = username, - password = password, - api_call_request = api_call_request, - wait = wait, - file_type = file_type -) +export_downloader(url, username, password, api_call_request, wait, file_type) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 175ec5e9151d84991e62b1d1338ff7487468dc1a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 14:58:50 +0000 Subject: [PATCH 044/203] removed pipe from export_downloader --- R/export_downloader.R | 47 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/R/export_downloader.R b/R/export_downloader.R index d86adf5..56be3bc 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -41,13 +41,13 @@ #' @importFrom purrr pluck #' @importFrom utils read.csv export_downloader <- function(url, - username, - password, - api_call_request, - wait, - file_type) { + username, + password, + api_call_request, + wait, + file_type) { - request_id <- GET( + export_log_id_request <- GET( paste0( api_call_request, "?filter=%7B%22where%22%3A%7B%22useDbColumns%22%3A%22true%22%2C%20%22", @@ -62,10 +62,11 @@ export_downloader <- function(url, password = password ) ) - ) %>% - content() %>% - pluck("exportLogId") + ) + + export_log_id_request_content <- content(export_log_id_request) + request_id <- pluck(export_log_id_request_content, "exportLogId") #Check status of request periodcially, until finished #function argument 'wait' determines the number of seconds to wait between @@ -92,8 +93,9 @@ export_downloader <- function(url, password = password ) ) - ) %>% - content() + ) + + export_request_status_content <- content(export_request_status) message( paste0( "...processed ", @@ -108,7 +110,7 @@ export_downloader <- function(url, #Download the export message("...beginning download") if (file_type == "json") { - df <- GET( + df_request <- GET( paste0( url, "api/export-logs/", @@ -120,14 +122,16 @@ export_downloader <- function(url, password = password ) ) - ) %>% - content("text", encoding = "UTF-8") %>% - fromJSON(flatten = TRUE) + ) + + df_content <- content(df_request, "text", encoding = "UTF-8") + + df <- fromJSON(df_content, flatten = TRUE) # fix one strange variable name names(df)[names(df) %in% "_id"] <- "id" } else if (file_type == "csv") { - df <- GET( + df_request <- GET( paste0( url, "api/export-logs/", @@ -139,10 +143,13 @@ export_downloader <- function(url, password = password ) ) - ) %>% - content("text", encoding = "UTF-8") %>% - textConnection() %>% - read.csv() + ) + + df_content <- content(df_request, "text", encoding = "UTF-8") + + df_content <- textConnection(df_content) + + df <- read.csv(df_content) names(df)[names(df) %in% "X_id"] <- "id" } From 95349c5f022b72ff73d7138d2038e3295503566e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 14:59:30 +0000 Subject: [PATCH 045/203] change output type of export_downloader to tibble --- R/export_downloader.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/export_downloader.R b/R/export_downloader.R index 56be3bc..bb8d354 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -154,5 +154,6 @@ export_downloader <- function(url, } message("...download complete!") + df <- tibble::as_tibble(df) return(df) } From 957f18969d364f0b3d0cd20d85f62e46705c4c03 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:02:53 +0000 Subject: [PATCH 046/203] used explicit namespace instead of import in export_downloader --- NAMESPACE | 1 - R/export_downloader.R | 30 +++++++++++------------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6c64574..efb977e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,4 +42,3 @@ import(urltools) importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(purrr,pluck) -importFrom(utils,read.csv) diff --git a/R/export_downloader.R b/R/export_downloader.R index bb8d354..f96628c 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -32,14 +32,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck -#' @importFrom utils read.csv export_downloader <- function(url, username, password, @@ -47,7 +39,7 @@ export_downloader <- function(url, wait, file_type) { - export_log_id_request <- GET( + export_log_id_request <- httr::GET( paste0( api_call_request, "?filter=%7B%22where%22%3A%7B%22useDbColumns%22%3A%22true%22%2C%20%22", @@ -64,9 +56,9 @@ export_downloader <- function(url, ) ) - export_log_id_request_content <- content(export_log_id_request) + export_log_id_request_content <- httr::content(export_log_id_request) - request_id <- pluck(export_log_id_request_content, "exportLogId") + request_id <- purrr::pluck(export_log_id_request_content, "exportLogId") #Check status of request periodcially, until finished #function argument 'wait' determines the number of seconds to wait between @@ -81,7 +73,7 @@ export_downloader <- function(url, status_step <- export_request_status$statusStep while (status_step != "LNG_STATUS_STEP_EXPORT_FINISHED") { Sys.sleep(wait) - export_request_status <- GET( + export_request_status <- httr::GET( paste0( url, "api/export-logs/", @@ -95,7 +87,7 @@ export_downloader <- function(url, ) ) - export_request_status_content <- content(export_request_status) + export_request_status_content <- httr::content(export_request_status) message( paste0( "...processed ", @@ -110,7 +102,7 @@ export_downloader <- function(url, #Download the export message("...beginning download") if (file_type == "json") { - df_request <- GET( + df_request <- httr::GET( paste0( url, "api/export-logs/", @@ -124,14 +116,14 @@ export_downloader <- function(url, ) ) - df_content <- content(df_request, "text", encoding = "UTF-8") + df_content <- httr::content(df_request, "text", encoding = "UTF-8") - df <- fromJSON(df_content, flatten = TRUE) + df <- jsonlite::fromJSON(df_content, flatten = TRUE) # fix one strange variable name names(df)[names(df) %in% "_id"] <- "id" } else if (file_type == "csv") { - df_request <- GET( + df_request <- httr::GET( paste0( url, "api/export-logs/", @@ -145,11 +137,11 @@ export_downloader <- function(url, ) ) - df_content <- content(df_request, "text", encoding = "UTF-8") + df_content <- httr::content(df_request, "text", encoding = "UTF-8") df_content <- textConnection(df_content) - df <- read.csv(df_content) + df <- utils::read.csv(df_content) names(df)[names(df) %in% "X_id"] <- "id" } From ecfcab3083be01d9665864dbc3a03946415fcbea Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:07:24 +0000 Subject: [PATCH 047/203] fixed variable name in export_downloader --- R/export_downloader.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/export_downloader.R b/R/export_downloader.R index f96628c..2c19c4f 100644 --- a/R/export_downloader.R +++ b/R/export_downloader.R @@ -87,7 +87,7 @@ export_downloader <- function(url, ) ) - export_request_status_content <- httr::content(export_request_status) + export_request_status <- httr::content(export_request_status) message( paste0( "...processed ", From 6d0edf14b9a476246b6db14dbecff94aa4f2dd81 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:08:36 +0000 Subject: [PATCH 048/203] added expectation for tibble in export_downloader tests --- tests/testthat/test-export_downloader.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-export_downloader.R b/tests/testthat/test-export_downloader.R index 1a94114..424ecbb 100644 --- a/tests/testthat/test-export_downloader.R +++ b/tests/testthat/test-export_downloader.R @@ -13,6 +13,7 @@ test_that("export_downloader works as expected", { file_type = "json" ) + expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") expect_identical(dim(res), c(13L, 357L)) expect_true( From ac3865e5707673732b504902c961b57a1a48cd54 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:14:41 +0000 Subject: [PATCH 049/203] removed recursive argument defaults for get_active_outbreak --- R/get_active_outbreak.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get_active_outbreak.R b/R/get_active_outbreak.R index b28509f..7cd5420 100644 --- a/R/get_active_outbreak.R +++ b/R/get_active_outbreak.R @@ -27,9 +27,9 @@ #' password = password #' ) #' } -get_active_outbreak <- function(url = url, - username = username, - password = password) { +get_active_outbreak <- function(url, + username, + password) { # get request to go.data godata_url <- httr::GET( From 4a5801aad341457b5dc32c8753422ef13c15fcce Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:17:33 +0000 Subject: [PATCH 050/203] removed recursive argument defaults for get_access_token --- R/get_access_token.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get_access_token.R b/R/get_access_token.R index 73bff56..1260b4a 100644 --- a/R/get_access_token.R +++ b/R/get_access_token.R @@ -24,9 +24,9 @@ #' @importFrom jsonlite fromJSON #' @export -get_access_token <- function(url=url, - username=username, - password=password) { +get_access_token <- function(url, + username, + password) { response <- POST(url=paste0(url,"api/oauth/token?access_token=123"), body = list(username=username, password=password), From 7a2b775e6cde09ee1dd05444d5102276d30ebe17 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:20:49 +0000 Subject: [PATCH 051/203] linted get_access_token --- R/get_access_token.R | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/R/get_access_token.R b/R/get_access_token.R index 1260b4a..05bc9cd 100644 --- a/R/get_access_token.R +++ b/R/get_access_token.R @@ -5,7 +5,8 @@ #' function used in many of the other #' `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @@ -23,21 +24,19 @@ #' @import httr #' @importFrom jsonlite fromJSON #' @export - get_access_token <- function(url, username, password) { - response <- POST(url=paste0(url,"api/oauth/token?access_token=123"), - body = list(username=username, password=password), + response <- POST(url = paste0(url, "api/oauth/token?access_token=123"), + body = list(username = username, password = password), encode = "json") - if (response$status_code==200) { - responseJSON <- content(response, as="text") - token <- fromJSON(responseJSON, flatten=TRUE)$access_token + if (response$status_code == 200) { + response_json <- content(response, as = "text") + token <- fromJSON(response_json, flatten = TRUE)$access_token return(token) } else { - stop(paste0("Error: ",response$status_code)) + stop(paste0("Error: ", response$status_code)) } - } From 26e9a281d357c17df8095c9b6408e41677bc0ccf Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:23:31 +0000 Subject: [PATCH 052/203] use explicit namespace instead of import for get_access_token --- R/get_access_token.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/get_access_token.R b/R/get_access_token.R index 05bc9cd..2b055cc 100644 --- a/R/get_access_token.R +++ b/R/get_access_token.R @@ -21,22 +21,22 @@ #' username = username, #' password = password) #' } -#' @import httr -#' @importFrom jsonlite fromJSON #' @export get_access_token <- function(url, username, password) { - response <- POST(url = paste0(url, "api/oauth/token?access_token=123"), - body = list(username = username, password = password), - encode = "json") + response <- httr::POST( + url = paste0(url, "api/oauth/token?access_token=123"), + body = list(username = username, password = password), + encode = "json" + ) if (response$status_code == 200) { - response_json <- content(response, as = "text") - token <- fromJSON(response_json, flatten = TRUE)$access_token + response_json <- httr::content(response, as = "text") + token <- jsonlite::fromJSON(response_json, flatten = TRUE)$access_token return(token) } else { - stop(paste0("Error: ", response$status_code)) + stop("Error: ", response$status_code) } } From 960cca8736539bae97d847ca8197ece97cc67547 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:27:49 +0000 Subject: [PATCH 053/203] added test for get_access_token (skipped by default) --- tests/testthat/test-get_access_token.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/testthat/test-get_access_token.R diff --git a/tests/testthat/test-get_access_token.R b/tests/testthat/test-get_access_token.R new file mode 100644 index 0000000..d342755 --- /dev/null +++ b/tests/testthat/test-get_access_token.R @@ -0,0 +1,18 @@ +test_that("get_access_token works as expected", { + skip("get_access_token requires API call") + + res <- get_access_token( + url = url, + username = username, + password = password + ) + + expect_type(res, "character") + # character string can contain alphanumeric characters + expect_true(grepl(pattern = "[:alphanum:]", x = res)) + # character string cannot contain punctuation marks + expect_false(grepl(pattern = "[[:punct:]]", x = res)) + # character string cannot contain spaces + expect_false(grepl(pattern = "\\s", x = res)) +}) + From a4d47a4d6135a4723a979678221508d2fceb81a7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:53:06 +0000 Subject: [PATCH 054/203] linted get_export_status --- R/get_export_status.R | 69 ++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/R/get_export_status.R b/R/get_export_status.R index 2daf647..4d621a9 100644 --- a/R/get_export_status.R +++ b/R/get_export_status.R @@ -4,7 +4,8 @@ #' request. This is a housekeeping function #' used in many of the other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @param request_id The id number for the export request. @@ -18,23 +19,35 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -#' access_token <- get_access_token(url=url, -#' username=username, -#' password=password) +#' access_token <- get_access_token( +#' url = url, +#' username = username, +#' password = password +#' ) #' -#' #Submit an export request -#' export.request <- GET(paste0(url,"api/outbreaks/",outbreak_id,"/cases/export", -#' "&access_token=",access_token)) +#' # Submit an export request +#' export.request <- GET( +#' paste0( +#' url, +#' "api/outbreaks/", +#' outbreak_id, +#' "/cases/export", +#' "&access_token=", +#' access_token +#' ) +#' ) #' request_id <- export.request %>% -#' content() %>% -#' pluck("exportLogId") +#' content() %>% +#' pluck("exportLogId") #' -#' #Check the status of the export request +#' # Check the status of the export request #' -#' export.request.status <- get_export_status(url=url, -#' username=username, -#' password=password, -#' request_id=request_id) +#' export_request_status <- get_export_status( +#' url = url, +#' username = username, +#' password = password, +#' request_id = request_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -43,16 +56,30 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_export_status <- function(url=url, - username=username, - password=password, - request_id=request_id) { +get_export_status <- function(url = url, + username = username, + password = password, + request_id = request_id) { - export.request.status <- GET(paste0(url,"api/export-logs/",request_id,"?access_token=",get_access_token(url=url, username=username, password=password))) %>% + export_request_status <- GET( + paste0( + url, + "api/export-logs/", + request_id, + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% content() - export.request.status <- export.request.status[c("statusStep","totalNo","processedNo")] + export_request_status <- export_request_status[ + c("statusStep", "totalNo", "processedNo") + ] - return(export.request.status) + return(export_request_status) } From 3c7ff143befb889313dc5fbda6dade56eeb82341 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:53:45 +0000 Subject: [PATCH 055/203] removed recursive argument defaults for get_export_status --- R/get_export_status.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_export_status.R b/R/get_export_status.R index 4d621a9..cec9564 100644 --- a/R/get_export_status.R +++ b/R/get_export_status.R @@ -56,10 +56,10 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_export_status <- function(url = url, - username = username, - password = password, - request_id = request_id) { +get_export_status <- function(url, + username, + password, + request_id) { export_request_status <- GET( paste0( From 4cc6abf1a752bc44966ebca385eaa2af50c3d0c3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:57:00 +0000 Subject: [PATCH 056/203] removed pipe from get_export_status --- R/get_export_status.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/get_export_status.R b/R/get_export_status.R index cec9564..7b1e5ba 100644 --- a/R/get_export_status.R +++ b/R/get_export_status.R @@ -26,7 +26,7 @@ #' ) #' #' # Submit an export request -#' export.request <- GET( +#' export_request <- GET( #' paste0( #' url, #' "api/outbreaks/", @@ -36,9 +36,8 @@ #' access_token #' ) #' ) -#' request_id <- export.request %>% -#' content() %>% -#' pluck("exportLogId") +#' request_id <- content(export_request) +#' request_id <- pluck(request_id, "exportLogId") #' #' # Check the status of the export request #' @@ -73,8 +72,8 @@ get_export_status <- function(url, password = password ) ) - ) %>% - content() + ) + export_request_status <- content(export_request_status) export_request_status <- export_request_status[ c("statusStep", "totalNo", "processedNo") From 7c6500d1f13d61c34abc6ccadfe549ea2455c099 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 15:59:15 +0000 Subject: [PATCH 057/203] used explicit namespace instead of import for get_export_status --- R/get_export_status.R | 12 ++-------- man/get_access_token.Rd | 5 ++-- man/get_active_outbreak.Rd | 2 +- man/get_export_status.Rd | 49 ++++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/R/get_export_status.R b/R/get_export_status.R index 7b1e5ba..a400b0c 100644 --- a/R/get_export_status.R +++ b/R/get_export_status.R @@ -48,19 +48,12 @@ #' request_id = request_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - get_export_status <- function(url, username, password, request_id) { - export_request_status <- GET( + export_request_status <- httr::GET( paste0( url, "api/export-logs/", @@ -73,12 +66,11 @@ get_export_status <- function(url, ) ) ) - export_request_status <- content(export_request_status) + export_request_status <- httr::content(export_request_status) export_request_status <- export_request_status[ c("statusStep", "totalNo", "processedNo") ] return(export_request_status) - } diff --git a/man/get_access_token.Rd b/man/get_access_token.Rd index d7df5a4..f504a36 100644 --- a/man/get_access_token.Rd +++ b/man/get_access_token.Rd @@ -4,10 +4,11 @@ \alias{get_access_token} \title{Get an access oauth access token for Go.Data} \usage{ -get_access_token(url = url, username = username, password = password) +get_access_token(url, username, password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} diff --git a/man/get_active_outbreak.Rd b/man/get_active_outbreak.Rd index 185500c..20d79e8 100644 --- a/man/get_active_outbreak.Rd +++ b/man/get_active_outbreak.Rd @@ -4,7 +4,7 @@ \alias{get_active_outbreak} \title{Get the currently active outbreak id number} \usage{ -get_active_outbreak(url = url, username = username, password = password) +get_active_outbreak(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't diff --git a/man/get_export_status.Rd b/man/get_export_status.Rd index 47e7a51..b891e7d 100644 --- a/man/get_export_status.Rd +++ b/man/get_export_status.Rd @@ -4,15 +4,11 @@ \alias{get_export_status} \title{Check the status of an export request from Go.Data (version 2.38.1 or later)} \usage{ -get_export_status( - url = url, - username = username, - password = password, - request_id = request_id -) +get_export_status(url, username, password, request_id) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -34,22 +30,33 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -access_token <- get_access_token(url=url, - username=username, - password=password) +access_token <- get_access_token( + url = url, + username = username, + password = password +) -#Submit an export request -export.request <- GET(paste0(url,"api/outbreaks/",outbreak_id,"/cases/export", - "&access_token=",access_token)) -request_id <- export.request \%>\% - content() \%>\% - pluck("exportLogId") +# Submit an export request +export_request <- GET( + paste0( + url, + "api/outbreaks/", + outbreak_id, + "/cases/export", + "&access_token=", + access_token + ) +) +request_id <- content(export_request) +request_id <- pluck(request_id, "exportLogId") -#Check the status of the export request +# Check the status of the export request -export.request.status <- get_export_status(url=url, - username=username, - password=password, - request_id=request_id) +export_request_status <- get_export_status( + url = url, + username = username, + password = password, + request_id = request_id +) } } From dcae444105da4f8eaec17488b41fd33235e9b7d2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:17:10 +0000 Subject: [PATCH 058/203] added test for get_export_status (skipped by default) --- tests/testthat/test-get_export_status.R | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/testthat/test-get_export_status.R diff --git a/tests/testthat/test-get_export_status.R b/tests/testthat/test-get_export_status.R new file mode 100644 index 0000000..e27c1db --- /dev/null +++ b/tests/testthat/test-get_export_status.R @@ -0,0 +1,42 @@ +test_that("get_export_status works as expected", { + skip("get_export_status requires API call") + + api_call_request <- paste0( + url, "api/outbreaks/", outbreak_id, "/cases/export" + ) + + export_log_id_request <- httr::GET( + paste0( + api_call_request, + "?filter=%7B%22where%22%3A%7B%22useDbColumns%22%3A%22true%22%2C%20%22", + "dontTranslateValues%22%3A%22true%22%2C%20%22", + "jsonReplaceUndefinedWithNull%22%3A%22true%22%20%7D%7D", + "&type=", + "json", + "&access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) + + export_log_id_request_content <- httr::content(export_log_id_request) + + request_id <- purrr::pluck(export_log_id_request_content, "exportLogId") + + res <- get_export_status( + url = url, + username = username, + password = password, + request_id = request_id + ) + + expect_type(res, "list") + expect_length(res, 3) + expect_named(res, c("statusStep", "totalNo", "processedNo")) + expect_type(res$statusStep, "character") + expect_type(res$totalNo, "integer") + expect_type(res$processedNo, "integer") +}) From 1b5ccc966539b91886b6a61a2a615dcc086deeee Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:31:45 +0000 Subject: [PATCH 059/203] linted get_all_outbreaks --- R/get_all_outbreaks.R | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/R/get_all_outbreaks.R b/R/get_all_outbreaks.R index ca98809..25b8faf 100644 --- a/R/get_all_outbreaks.R +++ b/R/get_all_outbreaks.R @@ -5,12 +5,15 @@ #' housekeeping function used in many of the #' other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' #' @return -#' Returns data frame of outbreaks. The resulting list is filtered by the user's permissions: only outbreaks for which the user has access will be returned. +#' Returns data frame of outbreaks. The resulting list is filtered by the +#' user's permissions: only outbreaks for which the user has access will be +#' returned. #' @export #' @examples #' \dontrun{ @@ -29,18 +32,26 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export +get_all_outbreaks <- function(url = url, + username = username, + password = password) { -get_all_outbreaks <- function(url=url, - username=username, - password=password) { - - outbreaks <- GET(paste0(url,"api/outbreaks", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(deleted!=TRUE) %>% - select(any_of(c("id", "name", "description","createdBy","createdAt"))) + outbreaks <- GET( + paste0( + url, + "api/outbreaks", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$deleted != TRUE) %>% + select(any_of(c("id", "name", "description", "createdBy", "createdAt"))) return(outbreaks) - } From 19fc4ddeafd8eb7703bcffc34734ade1e71d4b06 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:39:33 +0000 Subject: [PATCH 060/203] removed pipe from get_all_outbreaks --- R/get_all_outbreaks.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/R/get_all_outbreaks.R b/R/get_all_outbreaks.R index 25b8faf..2d64972 100644 --- a/R/get_all_outbreaks.R +++ b/R/get_all_outbreaks.R @@ -36,7 +36,7 @@ get_all_outbreaks <- function(url = url, username = username, password = password) { - outbreaks <- GET( + outbreaks_request <- GET( paste0( url, "api/outbreaks", @@ -47,11 +47,18 @@ get_all_outbreaks <- function(url = url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$deleted != TRUE) %>% - select(any_of(c("id", "name", "description", "createdBy", "createdAt"))) + ) + + outbreaks_content <- content(outbreaks_request, as = "text") + + outbreaks <- fromJSON(outbreaks_content, flatten = TRUE) + + outbreaks <- filter(outbreaks, .data$deleted != TRUE) + + outbreaks <- select( + outbreaks, + any_of(c("id", "name", "description", "createdBy", "createdAt")) + ) return(outbreaks) } From 2b95a8a17981100a8cf5053d25858153a8f28109 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:40:26 +0000 Subject: [PATCH 061/203] removed recursive argument defaults for get_all_outbreaks --- R/get_all_outbreaks.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get_all_outbreaks.R b/R/get_all_outbreaks.R index 2d64972..b8253f8 100644 --- a/R/get_all_outbreaks.R +++ b/R/get_all_outbreaks.R @@ -32,9 +32,9 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export -get_all_outbreaks <- function(url = url, - username = username, - password = password) { +get_all_outbreaks <- function(url, + username, + password) { outbreaks_request <- GET( paste0( From 64a7f12d9d3d8f31c017a97d20c52a0e72dd4e25 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:42:18 +0000 Subject: [PATCH 062/203] used explicit namespace instead of import for get_all_outbreaks --- R/get_all_outbreaks.R | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/R/get_all_outbreaks.R b/R/get_all_outbreaks.R index b8253f8..9062f3e 100644 --- a/R/get_all_outbreaks.R +++ b/R/get_all_outbreaks.R @@ -25,18 +25,12 @@ #' username=username, #' password=password) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck #' @export get_all_outbreaks <- function(url, username, password) { - outbreaks_request <- GET( + outbreaks_request <- httr::GET( paste0( url, "api/outbreaks", @@ -49,15 +43,15 @@ get_all_outbreaks <- function(url, ) ) - outbreaks_content <- content(outbreaks_request, as = "text") + outbreaks_content <- httr::content(outbreaks_request, as = "text") - outbreaks <- fromJSON(outbreaks_content, flatten = TRUE) + outbreaks <- jsonlite::fromJSON(outbreaks_content, flatten = TRUE) - outbreaks <- filter(outbreaks, .data$deleted != TRUE) + outbreaks <- dplyr::filter(outbreaks, .data$deleted != TRUE) - outbreaks <- select( + outbreaks <- dplyr::select( outbreaks, - any_of(c("id", "name", "description", "createdBy", "createdAt")) + dplyr::any_of(c("id", "name", "description", "createdBy", "createdAt")) ) return(outbreaks) From 552d11ddc18a541eab6ad2167f3a6cc59813df51 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 16:45:25 +0000 Subject: [PATCH 063/203] updated get_all_outbreaks documentation and made return type a tibble --- R/get_all_outbreaks.R | 10 +++++++--- man/get_all_outbreaks.Rd | 17 +++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/get_all_outbreaks.R b/R/get_all_outbreaks.R index 9062f3e..18c9639 100644 --- a/R/get_all_outbreaks.R +++ b/R/get_all_outbreaks.R @@ -21,9 +21,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' outbreaks <- get_all_outbreaks(url=url, -#' username=username, -#' password=password) +#' outbreaks <- get_all_outbreaks( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @export get_all_outbreaks <- function(url, @@ -54,5 +56,7 @@ get_all_outbreaks <- function(url, dplyr::any_of(c("id", "name", "description", "createdBy", "createdAt")) ) + outbreaks <- tibble::as_tibble(outbreaks) + return(outbreaks) } diff --git a/man/get_all_outbreaks.Rd b/man/get_all_outbreaks.Rd index 16e9ce3..3e0f6c6 100644 --- a/man/get_all_outbreaks.Rd +++ b/man/get_all_outbreaks.Rd @@ -4,17 +4,20 @@ \alias{get_all_outbreaks} \title{Get a list of all outbreaks and their attributes} \usage{ -get_all_outbreaks(url = url, username = username, password = password) +get_all_outbreaks(url, username, password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} } \value{ -Returns data frame of outbreaks. The resulting list is filtered by the user's permissions: only outbreaks for which the user has access will be returned. +Returns data frame of outbreaks. The resulting list is filtered by the +user's permissions: only outbreaks for which the user has access will be +returned. } \description{ A function to retrieve all outbreaks assigned @@ -28,8 +31,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -outbreaks <- get_all_outbreaks(url=url, - username=username, - password=password) +outbreaks <- get_all_outbreaks( + url = url, + username = username, + password = password +) } } From 475cd2f17c626d36efabc833926ee63cab5ec5f3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 1 Feb 2023 17:01:43 +0000 Subject: [PATCH 064/203] added test for get_all_outbreaks (skipped by default) --- tests/testthat/test-get_all_outbreaks.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/testthat/test-get_all_outbreaks.R diff --git a/tests/testthat/test-get_all_outbreaks.R b/tests/testthat/test-get_all_outbreaks.R new file mode 100644 index 0000000..af05858 --- /dev/null +++ b/tests/testthat/test-get_all_outbreaks.R @@ -0,0 +1,21 @@ +test_that("get_all_outbreaks works as expected", { + skip("get_all_outbreaks requires API call") + + res <- get_all_outbreaks( + url = url, + username = username, + password = password + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(10L, 5L)) + expect_identical( + colnames(res), + c("id", "name", "description", "createdBy", "createdAt" ) + ) + expect_identical( + unname(sapply(res[1, ], class)), + rep("character", 5) + ) +}) From bc0cc8576790aa84a0555302c3181b3e78243f90 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:34:25 +0000 Subject: [PATCH 065/203] linted check_godata_url --- R/check_godata_url.R | 9 +++++---- man/check_godata_url.Rd | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index 8a87f79..71d5ee8 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -4,7 +4,8 @@ #' is valid. This is a housekeeping function #' used in many of the other `godataR` functions. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' #' @return #' Boolean, where `TRUE` indicates a valid URL. @@ -17,14 +18,14 @@ #' @import httr #' @importFrom purrr pluck #' @export -check_godata_url <- function(url=url) { +check_godata_url <- function(url = url) { # Get status code for version check - status_code <- GET(paste0(url,"api/system-settings/version")) %>% + status_code <- GET(paste0(url, "api/system-settings/version")) %>% pluck("status_code") # create boolean based on status code being 200 (success) - check <- (status_code==200) + check <- (status_code == 200) return(check) diff --git a/man/check_godata_url.Rd b/man/check_godata_url.Rd index 08c91e2..71e126c 100644 --- a/man/check_godata_url.Rd +++ b/man/check_godata_url.Rd @@ -7,7 +7,8 @@ check_godata_url(url = url) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} } \value{ Boolean, where \code{TRUE} indicates a valid URL. From ecb17e0b3750cf66099dd2eb03801db2ec3bf1f2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:34:56 +0000 Subject: [PATCH 066/203] removed recursive argument default for check_godata_url --- R/check_godata_url.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index 71d5ee8..21a4354 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -18,7 +18,7 @@ #' @import httr #' @importFrom purrr pluck #' @export -check_godata_url <- function(url = url) { +check_godata_url <- function(url) { # Get status code for version check status_code <- GET(paste0(url, "api/system-settings/version")) %>% From cf14b9096cd586dfc71efbd18190f9af97c1b5d3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:35:55 +0000 Subject: [PATCH 067/203] removed pipe from check_godata_url --- R/check_godata_url.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index 21a4354..74afc0a 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -21,8 +21,9 @@ check_godata_url <- function(url) { # Get status code for version check - status_code <- GET(paste0(url, "api/system-settings/version")) %>% - pluck("status_code") + status_code <- GET(paste0(url, "api/system-settings/version")) + + status_code <- pluck(status_code, "status_code") # create boolean based on status code being 200 (success) check <- (status_code == 200) From 99fdf298e4dbaa186623f9ea310ae313c1ed445f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:37:17 +0000 Subject: [PATCH 068/203] used explicit namespace instead of import for check_godata_url --- R/check_godata_url.R | 9 +++------ man/check_godata_url.Rd | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index 74afc0a..e81bc8e 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -14,19 +14,16 @@ #' url <- "https://MyGoDataServer.com/" #' check_godata_url(url=url) #' } -#' @importFrom magrittr %>% -#' @import httr -#' @importFrom purrr pluck #' @export check_godata_url <- function(url) { # Get status code for version check - status_code <- GET(paste0(url, "api/system-settings/version")) + status_code <- httr::GET(paste0(url, "api/system-settings/version")) - status_code <- pluck(status_code, "status_code") + status_code <- purrr::pluck(status_code, "status_code") # create boolean based on status code being 200 (success) - check <- (status_code == 200) + check <- status_code == 200 return(check) diff --git a/man/check_godata_url.Rd b/man/check_godata_url.Rd index 71e126c..71e02b5 100644 --- a/man/check_godata_url.Rd +++ b/man/check_godata_url.Rd @@ -4,7 +4,7 @@ \alias{check_godata_url} \title{Check if the provided Go.Data URL is valid} \usage{ -check_godata_url(url = url) +check_godata_url(url) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From c6c3cedccb219daac3f55a37741c4406317cca3c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:40:20 +0000 Subject: [PATCH 069/203] added test for check_godata_url (skipped by default) --- tests/testthat/test-check_godata_url.R | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/testthat/test-check_godata_url.R diff --git a/tests/testthat/test-check_godata_url.R b/tests/testthat/test-check_godata_url.R new file mode 100644 index 0000000..80177f8 --- /dev/null +++ b/tests/testthat/test-check_godata_url.R @@ -0,0 +1,8 @@ +test_that("check_godata_url works as expected", { + skip("check_godata_url requires API call") + + res <- check_godata_url(url = url) + + expect_type(res, "logical") + expect_length(res, 1) +}) From b5934e320453e622c5d339a203fae702c48d9654 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:41:03 +0000 Subject: [PATCH 070/203] updated check_godata_url documentation --- R/check_godata_url.R | 2 +- man/check_godata_url.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index e81bc8e..f395640 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -12,7 +12,7 @@ #' @examples #' \dontrun{ #' url <- "https://MyGoDataServer.com/" -#' check_godata_url(url=url) +#' check_godata_url(url = url) #' } #' @export check_godata_url <- function(url) { diff --git a/man/check_godata_url.Rd b/man/check_godata_url.Rd index 71e02b5..f64d9ce 100644 --- a/man/check_godata_url.Rd +++ b/man/check_godata_url.Rd @@ -21,6 +21,6 @@ used in many of the other \code{godataR} functions. \examples{ \dontrun{ url <- "https://MyGoDataServer.com/" -check_godata_url(url=url) +check_godata_url(url = url) } } From 7cea7eb2c7b5b845722ce124211e8f6f03f001c2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:43:42 +0000 Subject: [PATCH 071/203] removed recursive default arguments for get_cases --- R/get_cases.R | 8 ++++---- man/get_cases.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/get_cases.R b/R/get_cases.R index d04f0c6..b6bbbd9 100644 --- a/R/get_cases.R +++ b/R/get_cases.R @@ -59,10 +59,10 @@ #' outbreak_id = outbreak_id #' ) #' } -get_cases <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_cases <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, diff --git a/man/get_cases.Rd b/man/get_cases.Rd index aa28bbd..f207f72 100644 --- a/man/get_cases.Rd +++ b/man/get_cases.Rd @@ -5,10 +5,10 @@ \title{Download cases from Go.Data} \usage{ get_cases( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From 588ad2c4bf8c3dfb96d012a40a59e32c6fb9742a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:55:43 +0000 Subject: [PATCH 072/203] linted get_clusters --- R/get_clusters.R | 99 ++++++++++++++++++++++++++++++++------------- man/get_clusters.Rd | 19 +++++---- 2 files changed, 83 insertions(+), 35 deletions(-) diff --git a/R/get_clusters.R b/R/get_clusters.R index fecc251..d7c9517 100644 --- a/R/get_clusters.R +++ b/R/get_clusters.R @@ -9,11 +9,14 @@ #' #' This function works on all versions of Go.Data. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download clusters. -#' @param batch_size For large datasets, specify the number of records to retrieve in each iteration. +#' @param outbreak_id The id number for the outbreak for which you want to +#' download clusters. +#' @param batch_size For large datasets, specify the number of records to +#' retrieve in each iteration. #' #' @return #' Returns data frame. @@ -26,10 +29,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' clusters <- get_clusters(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' clusters <- get_clusters( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -39,55 +44,93 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_clusters <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - batch_size=50000) { +get_clusters <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + batch_size = 50000) { # no /export endpoint for clusters so no need to check version + outbreak_id_api <- get_active_outbreak( + url = url, + username = username, + password = password + ) #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + if (outbreak_id != outbreak_id_api) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #get total number of records - df_n <- GET(paste0(url,"api/outbreaks/",outbreak_id,"/clusters/count"), - add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% + df_n <- GET( + paste0(url, "api/outbreaks/", outbreak_id, "/clusters/count"), + add_headers(Authorization = paste("Bearer", get_access_token( + url = url, + username = username, + password = password + ), sep = " "))) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% unlist() %>% unname() #Import records in batches df <- tibble() batch_size <- batch_size # number of records to import per iteration - skip <-0 + skip <- 0 message("****************************") #Download records in batches, and then append them into a single dataset while (skip < df_n) { #Progress message - if (df_n <= batch_size) message(paste0("...downloading records 1 to ",df_n)) - if (df_n > batch_size) message(paste0("...downloading records ", as.character(skip+1, scientific = FALSE), " to ", format(skip+batch_size, scientific = FALSE))) + if (df_n <= batch_size) { + message(paste0("...downloading records 1 to ", df_n)) + } + if (df_n > batch_size) { + message(paste0( + "...downloading records ", + as.character(skip + 1, scientific = FALSE), + " to ", + format(skip + batch_size, scientific = FALSE) + )) + } #fetch the batch of records - df.i <- GET(paste0(url,"api/outbreaks/",outbreak_id,"/clusters", - "/?filter={%22limit%22:",format(batch_size, scientific = FALSE),",%22skip%22:",format(skip, scientific = FALSE),"}"), - add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>% - content(as='text') %>% - fromJSON( flatten=TRUE) %>% + df_i <- GET( + paste0( + url, + "api/outbreaks/", + outbreak_id, + "/clusters", + "/?filter={%22limit%22:", + format(batch_size, scientific = FALSE), + ",%22skip%22:", + format(skip, scientific = FALSE), + "}" + ), + add_headers(Authorization = paste("Bearer", get_access_token( + url = url, + username = username, + password = password + ), sep = " "))) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% as_tibble() #append the new batch of records to the existing data frame df <- df %>% - bind_rows(df.i) + bind_rows(df_i) #update numbers for the next iteration skip <- skip + batch_size - rm(df.i) + rm(df_i) } rm(batch_size, skip, df_n) diff --git a/man/get_clusters.Rd b/man/get_clusters.Rd index a629ae9..1f743c6 100644 --- a/man/get_clusters.Rd +++ b/man/get_clusters.Rd @@ -13,15 +13,18 @@ get_clusters( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download clusters.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download clusters.} -\item{batch_size}{For large datasets, specify the number of records to retrieve in each iteration.} +\item{batch_size}{For large datasets, specify the number of records to +retrieve in each iteration.} } \value{ Returns data frame. @@ -44,9 +47,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -clusters <- get_clusters(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +clusters <- get_clusters( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 45a7beaf0ff6a43f9d256f85fdec5e30d7f69a44 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 10:56:19 +0000 Subject: [PATCH 073/203] removed recursive argument defaults for get_clusters --- R/get_clusters.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_clusters.R b/R/get_clusters.R index d7c9517..6c5bd4c 100644 --- a/R/get_clusters.R +++ b/R/get_clusters.R @@ -44,10 +44,10 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_clusters <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_clusters <- function(url, + username, + password, + outbreak_id, batch_size = 50000) { # no /export endpoint for clusters so no need to check version From 996633810ea027079a9e92928ae6f927fa46584f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 11:00:56 +0000 Subject: [PATCH 074/203] removed pipe from get_clusters --- R/get_clusters.R | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/R/get_clusters.R b/R/get_clusters.R index 6c5bd4c..0a74339 100644 --- a/R/get_clusters.R +++ b/R/get_clusters.R @@ -68,17 +68,18 @@ get_clusters <- function(url, } #get total number of records - df_n <- GET( + df_n_request <- GET( paste0(url, "api/outbreaks/", outbreak_id, "/clusters/count"), add_headers(Authorization = paste("Bearer", get_access_token( url = url, username = username, password = password - ), sep = " "))) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - unlist() %>% - unname() + ), sep = " "))) + + df_n_content <- content(df_n_request, as = "text") + + df_n <- fromJSON(df_n_content, flatten = TRUE) + df_n <- unname(unlist(df_n)) #Import records in batches df <- tibble() @@ -103,7 +104,7 @@ get_clusters <- function(url, } #fetch the batch of records - df_i <- GET( + df_i_request <- GET( paste0( url, "api/outbreaks/", @@ -119,14 +120,17 @@ get_clusters <- function(url, url = url, username = username, password = password - ), sep = " "))) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - as_tibble() + ), sep = " ")) + ) + + df_i_content <- content(df_i_request, as = "text") + + df_i <- fromJSON(df_i_content, flatten = TRUE) + + df_i <- as_tibble(df_i) #append the new batch of records to the existing data frame - df <- df %>% - bind_rows(df_i) + df <- bind_rows(df, df_i) #update numbers for the next iteration skip <- skip + batch_size From 0d4f3ca520618f77b3c0bd3beb70b768d3e117bf Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 11:04:15 +0000 Subject: [PATCH 075/203] used explicit namespace instead of import for get_clusters --- R/get_clusters.R | 30 +++++++++++------------------- man/get_clusters.Rd | 8 +------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/R/get_clusters.R b/R/get_clusters.R index 0a74339..dbbd548 100644 --- a/R/get_clusters.R +++ b/R/get_clusters.R @@ -36,14 +36,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - get_clusters <- function(url, username, password, @@ -68,21 +60,21 @@ get_clusters <- function(url, } #get total number of records - df_n_request <- GET( + df_n_request <- httr::GET( paste0(url, "api/outbreaks/", outbreak_id, "/clusters/count"), - add_headers(Authorization = paste("Bearer", get_access_token( + httr::add_headers(Authorization = paste("Bearer", get_access_token( url = url, username = username, password = password ), sep = " "))) - df_n_content <- content(df_n_request, as = "text") + df_n_content <- httr::content(df_n_request, as = "text") - df_n <- fromJSON(df_n_content, flatten = TRUE) + df_n <- jsonlite::fromJSON(df_n_content, flatten = TRUE) df_n <- unname(unlist(df_n)) #Import records in batches - df <- tibble() + df <- tibble::tibble() batch_size <- batch_size # number of records to import per iteration skip <- 0 message("****************************") @@ -104,7 +96,7 @@ get_clusters <- function(url, } #fetch the batch of records - df_i_request <- GET( + df_i_request <- httr::GET( paste0( url, "api/outbreaks/", @@ -116,21 +108,21 @@ get_clusters <- function(url, format(skip, scientific = FALSE), "}" ), - add_headers(Authorization = paste("Bearer", get_access_token( + httr::add_headers(Authorization = paste("Bearer", get_access_token( url = url, username = username, password = password ), sep = " ")) ) - df_i_content <- content(df_i_request, as = "text") + df_i_content <- httr::content(df_i_request, as = "text") - df_i <- fromJSON(df_i_content, flatten = TRUE) + df_i <- jsonlite::fromJSON(df_i_content, flatten = TRUE) - df_i <- as_tibble(df_i) + df_i <- tibble::as_tibble(df_i) #append the new batch of records to the existing data frame - df <- bind_rows(df, df_i) + df <- dplyr::bind_rows(df, df_i) #update numbers for the next iteration skip <- skip + batch_size diff --git a/man/get_clusters.Rd b/man/get_clusters.Rd index 1f743c6..550a938 100644 --- a/man/get_clusters.Rd +++ b/man/get_clusters.Rd @@ -4,13 +4,7 @@ \alias{get_clusters} \title{Download clusters from Go.Data (version agnostic)} \usage{ -get_clusters( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, - batch_size = 50000 -) +get_clusters(url, username, password, outbreak_id, batch_size = 50000) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 0c9cb7b5088b9bfde8b2a115a6fcf67ad4476ffb Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 11:42:07 +0000 Subject: [PATCH 076/203] added test for get_clusters --- tests/testthat/test-get_clusters.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/testthat/test-get_clusters.R diff --git a/tests/testthat/test-get_clusters.R b/tests/testthat/test-get_clusters.R new file mode 100644 index 0000000..3155a80 --- /dev/null +++ b/tests/testthat/test-get_clusters.R @@ -0,0 +1,23 @@ +test_that("get_clusters works as expected", { + skip("get_clusters requires API call") + + res <- get_clusters( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 11L)) + expect_identical( + colnames(res), + c("name", "description", "icon", "colorCode", "id", "createdAt", + "createdBy", "updatedAt", "updatedBy", "createdOn", "deleted") + ) + expect_identical( + unname(sapply(res[1, ], class)), + c(rep("character", 10), "logical") + ) +}) From a8fe768d61e551c981a11532587e6f490b129216 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 11:56:16 +0000 Subject: [PATCH 077/203] linted set_active_outbreak --- R/set_active_outbreak.R | 87 ++++++++++++++++++++++++++++---------- man/set_active_outbreak.Rd | 3 +- 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/R/set_active_outbreak.R b/R/set_active_outbreak.R index 530dcd7..960067c 100644 --- a/R/set_active_outbreak.R +++ b/R/set_active_outbreak.R @@ -8,7 +8,8 @@ #' Each Go.Data user can have 1 and only 1 active #' outbreak at a given time. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @param outbreak_id The id number for the outbreak to set to active. @@ -34,37 +35,77 @@ #' @import httr #' @importFrom jsonlite fromJSON -set_active_outbreak <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) { +set_active_outbreak <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id) { #Get User ID & Active Outbreak ID - user.details <- GET(paste0(url,"api/users", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(email==username) + user_details <- GET( + paste0( + url, + "api/users", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$email == username) - current.active.outbreak <- user.details$activeOutbreakId - user.id <- user.details$id + current_active_outbreak <- user_details$activeOutbreakId + user_id <- user_details$id #Get List of Available Outbreak IDs - available.outbreaks <- get_all_outbreaks(url=url, username=username, password=password) %>% - select(id) %>% unlist() + available_outbreaks <- get_all_outbreaks( + url = url, + username = username, + password = password + ) %>% + select(id) %>% + unlist() - if (current.active.outbreak == outbreak_id) { #Is outbreak_id already active? - text <- paste0("Active outbreak not changed. ", outbreak_id, " is already active.") - } else if (!(outbreak_id %in% available.outbreaks)) { - stop(paste0("Active outbreak not changed. ",outbreak_id, " not in list of user's available outbreaks. Make sure the id number is correct & that the user has proper access.")) + if (current_active_outbreak == outbreak_id) { # Is outbreak_id already active? + text <- paste0( + "Active outbreak not changed. ", + outbreak_id, + " is already active." + ) + } else if (!(outbreak_id %in% available_outbreaks)) { + stop(paste0( + "Active outbreak not changed. ", + outbreak_id, + " not in list of user's available outbreaks. Make sure the id number is", + " correct & that the user has proper access." + )) } else { - new.data <- list("activeOutbreakId"=outbreak_id) - patch.active.outbreak <- PATCH(paste0(url,"api/users/",user.id), - add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " ")), - body=new.data, - encode="json") + new_data <- list("activeOutbreakId" = outbreak_id) + patch_active_outbreak <- PATCH( + paste0( + url, + "api/users/", + user_id + ), + add_headers( + Authorization = paste( + "Bearer", + get_access_token( + url = url, + username = username, + password = password + ), + sep = " " + ) + ), + body = new_data, + encode = "json" + ) text <- paste0("Active outbreak changed! ", outbreak_id, " is now active.") } diff --git a/man/set_active_outbreak.Rd b/man/set_active_outbreak.Rd index 8ffbc8a..9bc8d0f 100644 --- a/man/set_active_outbreak.Rd +++ b/man/set_active_outbreak.Rd @@ -12,7 +12,8 @@ set_active_outbreak( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} From 270e43061ee6f7354e0e4d5433032fe7429176fb Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 11:56:59 +0000 Subject: [PATCH 078/203] removed recursive argument defaults from set_active_outbreak --- R/set_active_outbreak.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/set_active_outbreak.R b/R/set_active_outbreak.R index 960067c..52bbbe9 100644 --- a/R/set_active_outbreak.R +++ b/R/set_active_outbreak.R @@ -35,10 +35,10 @@ #' @import httr #' @importFrom jsonlite fromJSON -set_active_outbreak <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id) { +set_active_outbreak <- function(url, + username, + password, + outbreak_id) { #Get User ID & Active Outbreak ID From a52f48f7931fee08537941113e0bfd72c3c50f68 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 12:00:02 +0000 Subject: [PATCH 079/203] removed pipe from set_active_outbreak --- R/set_active_outbreak.R | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/R/set_active_outbreak.R b/R/set_active_outbreak.R index 52bbbe9..56187bd 100644 --- a/R/set_active_outbreak.R +++ b/R/set_active_outbreak.R @@ -42,7 +42,7 @@ set_active_outbreak <- function(url, #Get User ID & Active Outbreak ID - user_details <- GET( + user_details_request <- GET( paste0( url, "api/users", @@ -53,10 +53,13 @@ set_active_outbreak <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$email == username) + ) + + user_details_content <- content(user_details_request, as = "text") + + user_details <- fromJSON(user_details_content, flatten = TRUE) + + user_details <- filter(user_details, .data$email == username) current_active_outbreak <- user_details$activeOutbreakId user_id <- user_details$id @@ -66,9 +69,10 @@ set_active_outbreak <- function(url, url = url, username = username, password = password - ) %>% - select(id) %>% - unlist() + ) + + available_outbreaks <- unlist(select(available_outbreaks, id)) + if (current_active_outbreak == outbreak_id) { # Is outbreak_id already active? text <- paste0( From 77d87db72b1225653a990809d39c68c1ee96f0ae Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 12:03:33 +0000 Subject: [PATCH 080/203] used explicit namespace instead of import for set_active_outbreak --- R/set_active_outbreak.R | 30 +++++++++++++----------------- man/set_active_outbreak.Rd | 17 +++++++---------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/R/set_active_outbreak.R b/R/set_active_outbreak.R index 56187bd..9a602f7 100644 --- a/R/set_active_outbreak.R +++ b/R/set_active_outbreak.R @@ -24,17 +24,13 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' set_active_outbreak(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' set_active_outbreak( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON - set_active_outbreak <- function(url, username, password, @@ -42,7 +38,7 @@ set_active_outbreak <- function(url, #Get User ID & Active Outbreak ID - user_details_request <- GET( + user_details_request <- httr::GET( paste0( url, "api/users", @@ -55,11 +51,11 @@ set_active_outbreak <- function(url, ) ) - user_details_content <- content(user_details_request, as = "text") + user_details_content <- httr::content(user_details_request, as = "text") - user_details <- fromJSON(user_details_content, flatten = TRUE) + user_details <- jsonlite::fromJSON(user_details_content, flatten = TRUE) - user_details <- filter(user_details, .data$email == username) + user_details <- dplyr::filter(user_details, .data$email == username) current_active_outbreak <- user_details$activeOutbreakId user_id <- user_details$id @@ -71,7 +67,7 @@ set_active_outbreak <- function(url, password = password ) - available_outbreaks <- unlist(select(available_outbreaks, id)) + available_outbreaks <- unlist(dplyr::select(available_outbreaks, id)) if (current_active_outbreak == outbreak_id) { # Is outbreak_id already active? @@ -90,13 +86,13 @@ set_active_outbreak <- function(url, } else { new_data <- list("activeOutbreakId" = outbreak_id) - patch_active_outbreak <- PATCH( + patch_active_outbreak <- httr::PATCH( paste0( url, "api/users/", user_id ), - add_headers( + httr::add_headers( Authorization = paste( "Bearer", get_access_token( diff --git a/man/set_active_outbreak.Rd b/man/set_active_outbreak.Rd index 9bc8d0f..549b038 100644 --- a/man/set_active_outbreak.Rd +++ b/man/set_active_outbreak.Rd @@ -4,12 +4,7 @@ \alias{set_active_outbreak} \title{Change the currently active outbreak} \usage{ -set_active_outbreak( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id -) +set_active_outbreak(url, username, password, outbreak_id) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't @@ -41,9 +36,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -set_active_outbreak(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 5f56dcfd4b4ceadfcf9063734c0cb08c5b9dc016 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 13:38:25 +0000 Subject: [PATCH 081/203] set_active_outbreak now returns outbreak_id invisibly --- R/set_active_outbreak.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/set_active_outbreak.R b/R/set_active_outbreak.R index 9a602f7..1b112d2 100644 --- a/R/set_active_outbreak.R +++ b/R/set_active_outbreak.R @@ -36,7 +36,6 @@ set_active_outbreak <- function(url, password, outbreak_id) { - #Get User ID & Active Outbreak ID user_details_request <- httr::GET( paste0( @@ -111,6 +110,6 @@ set_active_outbreak <- function(url, message(text) - + invisible(outbreak_id) } From 2eaec65d9a92c724e79b2ebc193781e36006db64 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 13:43:04 +0000 Subject: [PATCH 082/203] added tests for set_active_outbreak --- tests/testthat/test-set_active_outbreak.R | 32 +++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/testthat/test-set_active_outbreak.R diff --git a/tests/testthat/test-set_active_outbreak.R b/tests/testthat/test-set_active_outbreak.R new file mode 100644 index 0000000..8a3ec4c --- /dev/null +++ b/tests/testthat/test-set_active_outbreak.R @@ -0,0 +1,32 @@ +test_that("set_active_outbreak works as expected", { + skip("set_active_outbreak requires API call") + + expect_message( + res <- set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ), + regexp = "Active outbreak not changed." + ) + + expect_type(res, "character") +}) + +test_that("set_active_outbreak works as expected with non-valid outbreak id", { + skip("set_active_outbreak requires API call") + + expect_error( + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = "123" + ), + regexp = paste0( + "Active outbreak not changed. 123 not in list of user's ", + "available outbreaks" + ) + ) +}) From 7abe81faae1fef291e4d425223112468e880872e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:07:51 +0000 Subject: [PATCH 083/203] linted get_contacts --- R/get_contacts.R | 116 ++++++++++++++++++++++++++++---------------- man/get_contacts.Rd | 34 ++++++++----- 2 files changed, 96 insertions(+), 54 deletions(-) diff --git a/R/get_contacts.R b/R/get_contacts.R index c79b1e3..1a686e2 100644 --- a/R/get_contacts.R +++ b/R/get_contacts.R @@ -6,33 +6,43 @@ #' This function works on all versions of Go.Data. There #' are two methods for downloading the data: #' -#' `method="batches"` will work on all versions of +#' `method = "batches"` will work on all versions of #' Go.Data. This method relies on the GET outbreak/{id}/contacts #' API endpoint. Records are then retrieved in batches #' based on `batch_size` and appended together into -#' a final dataset. `method="batches"` will be the default and +#' a final dataset. `method = "batches"` will be the default and #' only available method for Go.Data version 2.38.0 or older. #' -#' `method="export"` will only work on Go.Data versions +#' `method = "export"` will only work on Go.Data versions #' 2.38.1 or newer. This method relies on the GET #' outbreak/{id}/contacts/export API endpoint. An export #' request is submitted to the server, and then when the #' export is ready, it will be downloaded. Due to better -#' performance and more options, `method="export"` will +#' performance and more options, `method = "export"` will #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method="export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method="export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' #' @examples @@ -56,23 +66,33 @@ #' @importFrom purrr pluck -get_contacts <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_contacts <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -81,30 +101,42 @@ get_contacts <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/contacts/filtered-count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/contacts") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/contacts/filtered-count" + ) + api_call_get <- paste0(url, "api/outbreaks/", outbreak_id, "/contacts") + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/contacts/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) - - + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/contacts/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) } return(df) diff --git a/man/get_contacts.Rd b/man/get_contacts.Rd index 68817ca..6bc714a 100644 --- a/man/get_contacts.Rd +++ b/man/get_contacts.Rd @@ -12,28 +12,38 @@ get_contacts( method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method="export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method="export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the contact data for a @@ -43,19 +53,19 @@ specific \code{outbreak_id}. This function works on all versions of Go.Data. There are two methods for downloading the data: -\code{method="batches"} will work on all versions of +\code{method = "batches"} will work on all versions of Go.Data. This method relies on the GET outbreak/{id}/contacts API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into -a final dataset. \code{method="batches"} will be the default and +a final dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. -\code{method="export"} will only work on Go.Data versions +\code{method = "export"} will only work on Go.Data versions 2.38.1 or newer. This method relies on the GET outbreak/{id}/contacts/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to better -performance and more options, \code{method="export"} will +performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } From cf5c17b2909a0c5641efded05a961a02743e81e2 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:09:31 +0000 Subject: [PATCH 084/203] removed recursive argument defaults for get_contacts --- R/get_contacts.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_contacts.R b/R/get_contacts.R index 1a686e2..e5d6a91 100644 --- a/R/get_contacts.R +++ b/R/get_contacts.R @@ -66,10 +66,10 @@ #' @importFrom purrr pluck -get_contacts <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_contacts <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From dadff767b1001e421e681b7c60b2a22e8cdba49a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:13:14 +0000 Subject: [PATCH 085/203] removed imports from get_contacts --- R/get_contacts.R | 19 ++++++------------- man/get_contacts.Rd | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/R/get_contacts.R b/R/get_contacts.R index e5d6a91..1ae54bb 100644 --- a/R/get_contacts.R +++ b/R/get_contacts.R @@ -52,20 +52,13 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' contacts <- get_contacts(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id=outbreak_id +#' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - get_contacts <- function(url, username, password, diff --git a/man/get_contacts.Rd b/man/get_contacts.Rd index 6bc714a..3e9389e 100644 --- a/man/get_contacts.Rd +++ b/man/get_contacts.Rd @@ -5,10 +5,10 @@ \title{Download contacts from Go.Data} \usage{ get_contacts( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, @@ -76,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -contacts <- get_contacts(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id=outbreak_id +) } } From 28e64bdea5f52f1dd4f50a77a6dac04bad29b061 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:29:35 +0000 Subject: [PATCH 086/203] added test for get_contacts --- tests/testthat/test-get_contacts.R | 55 ++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/testthat/test-get_contacts.R diff --git a/tests/testthat/test-get_contacts.R b/tests/testthat/test-get_contacts.R new file mode 100644 index 0000000..0df14a6 --- /dev/null +++ b/tests/testthat/test-get_contacts.R @@ -0,0 +1,55 @@ +test_that("get_contacts works as expected", { + skip("get_contacts requires API call") + + res <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(14L, 64L)) + expect_identical( + colnames(res), + c("id", "visualId", "dateOfReporting", "isDateOfReportingApproximate", + "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "firstName", "middleName", "lastName", "gender", + "occupation", "dob", "classification", "wasContact", "wasCase", + "dateBecomeContact", "riskLevel", "riskReason", "outcomeId", + "dateOfOutcome", "documents", "type", "transferRefused", "addresses", + "safeBurial", "dateOfBurial", "followUpTeamId", "dateOfLastContact", + "numberOfExposures", "numberOfContacts", "vaccinesReceived", + "pregnancyStatus", "responsibleUserId", "age.years", "age.months", + "followUp.originalStartDate", "followUp.startDate", "followUp.endDate", + "followUp.status", "relationship.contactDate", + "relationship.contactDateEstimated", "relationship.certaintyLevelId", + "relationship.createdAt", "relationship.createdBy", + "relationship.updatedAt", "relationship.updatedBy", + "relationship.createdOn", "relationship.deleted", + "relationship.relatedId", "relationship.id", + "relationship.exposureTypeId", "relationship.exposureFrequencyId", + "relationship.exposureDurationId", + "relationship.socialRelationshipTypeId", + "relationship.socialRelationshipDetail", "relationship.clusterId", + "relationship.comment", "relationship.deletedAt", + "questionnaireAnswers.test") + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "logical", "character", + "character", "character", "character", "logical", "logical", "character", + "character", "logical", "character", "character", "character", "logical", + "character", "logical", "logical", "character", "character", "character", + "logical", "logical", "list", "character", "logical", "list", "logical", + "logical", "character", "character", "integer", "integer", "list", + "logical", "character", "integer", "integer", "character", "character", + "character", "character", "character", "logical", "character", + "character", "character", "character", "character", "character", + "logical", "character", "character", "character", "character", + "character", "character", "character", "logical", "logical", "logical", + "list") + ) +}) From 58d4309bd6d0683f26c937d22b6930824c9461ad Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:30:44 +0000 Subject: [PATCH 087/203] ensure contacts are returned as tibble in get_contacts --- R/get_contacts.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/get_contacts.R b/R/get_contacts.R index 1ae54bb..3095f00 100644 --- a/R/get_contacts.R +++ b/R/get_contacts.R @@ -132,5 +132,7 @@ get_contacts <- function(url, ) } + df <- tibble::as_tibble(df) + return(df) } From 98796a17648340390f188b2135e80e62fcbea482 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:43:46 +0000 Subject: [PATCH 088/203] linted get_contacts_of_contacts --- R/get_contacts_of_contacts.R | 131 +++++++++++++++++++++----------- man/get_contacts_of_contacts.Rd | 44 +++++++---- 2 files changed, 113 insertions(+), 62 deletions(-) diff --git a/R/get_contacts_of_contacts.R b/R/get_contacts_of_contacts.R index 7459ffb..c97a513 100644 --- a/R/get_contacts_of_contacts.R +++ b/R/get_contacts_of_contacts.R @@ -6,34 +6,44 @@ #' This function works on all versions of Go.Data. There #' are two methods for downloading the data: #' -#' `method="batches"` will work on all versions of +#' `method = "batches"` will work on all versions of #' Go.Data. This method relies on the GET #' outbreak/{id}/contacts-of-contacts API endpoint. #' Records are then retrieved in batches based on #' `batch_size` and appended together into a final -#' dataset. `method="batches"` will be the default and +#' dataset. `method = "batches"` will be the default and #' only available method for Go.Data version 2.38.0 or older. #' -#' `method="export"` will only work on Go.Data versions +#' `method = "export"` will only work on Go.Data versions #' 2.38.1 or newer. This method relies on the GET #' outbreak/{id}/contacts-of-contacts/export API endpoint. #' An export request is submitted to the server, and then #' when the export is ready, it will be downloaded. Due to -#' better performance and more options, `method="export"` will +#' better performance and more options, `method = "export"` will #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method = "export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' #' @examples @@ -43,10 +53,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' contacts_of_contacts <- get_contacts_of_contacts(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' contacts_of_contacts <- get_contacts_of_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -57,23 +69,33 @@ #' @importFrom purrr pluck -get_contacts_of_contacts <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_contacts_of_contacts <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -82,30 +104,47 @@ get_contacts_of_contacts <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/contacts-of-contacts/filtered-count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/contacts-of-contacts") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/contacts-of-contacts/filtered-count" + ) + api_call_get <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/contacts-of-contacts" + ) + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/contacts-of-contacts/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) - - + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/contacts-of-contacts/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) } return(df) diff --git a/man/get_contacts_of_contacts.Rd b/man/get_contacts_of_contacts.Rd index 66bc41f..0c5de92 100644 --- a/man/get_contacts_of_contacts.Rd +++ b/man/get_contacts_of_contacts.Rd @@ -12,28 +12,38 @@ get_contacts_of_contacts( method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the contact-of-contact @@ -43,20 +53,20 @@ data for a specific \code{outbreak_id}. This function works on all versions of Go.Data. There are two methods for downloading the data: -\code{method="batches"} will work on all versions of +\code{method = "batches"} will work on all versions of Go.Data. This method relies on the GET outbreak/{id}/contacts-of-contacts API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into a final -dataset. \code{method="batches"} will be the default and +dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. -\code{method="export"} will only work on Go.Data versions +\code{method = "export"} will only work on Go.Data versions 2.38.1 or newer. This method relies on the GET outbreak/{id}/contacts-of-contacts/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to -better performance and more options, \code{method="export"} will +better performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } @@ -67,9 +77,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -contacts_of_contacts <- get_contacts_of_contacts(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From e9fddf36523080374e81c5df1d1684fe78b5815f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:44:28 +0000 Subject: [PATCH 089/203] removed recursive default arguments from get_contacts_of_contacts --- R/get_contacts_of_contacts.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_contacts_of_contacts.R b/R/get_contacts_of_contacts.R index c97a513..8b25612 100644 --- a/R/get_contacts_of_contacts.R +++ b/R/get_contacts_of_contacts.R @@ -69,10 +69,10 @@ #' @importFrom purrr pluck -get_contacts_of_contacts <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_contacts_of_contacts <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From b298b01e91fa565ac47531dd0f2abe87fa0cbb2a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:45:33 +0000 Subject: [PATCH 090/203] removed imports from get_contacts_of_contacts --- R/get_contacts_of_contacts.R | 9 --------- man/get_contacts_of_contacts.Rd | 8 ++++---- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/R/get_contacts_of_contacts.R b/R/get_contacts_of_contacts.R index 8b25612..cd98bdf 100644 --- a/R/get_contacts_of_contacts.R +++ b/R/get_contacts_of_contacts.R @@ -60,15 +60,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - get_contacts_of_contacts <- function(url, username, password, diff --git a/man/get_contacts_of_contacts.Rd b/man/get_contacts_of_contacts.Rd index 0c5de92..33d8205 100644 --- a/man/get_contacts_of_contacts.Rd +++ b/man/get_contacts_of_contacts.Rd @@ -5,10 +5,10 @@ \title{Download contacts-of-contacts from Go.Data} \usage{ get_contacts_of_contacts( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From 9f10f0c5cbcd4236719627c0f5842dccc4831717 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:46:11 +0000 Subject: [PATCH 091/203] ensure get_contact_of_contacts is returned as a tibble --- R/get_contacts_of_contacts.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/get_contacts_of_contacts.R b/R/get_contacts_of_contacts.R index cd98bdf..a18baae 100644 --- a/R/get_contacts_of_contacts.R +++ b/R/get_contacts_of_contacts.R @@ -138,5 +138,7 @@ get_contacts_of_contacts <- function(url, ) } + df <- tibble::as_tibble(df) + return(df) } From d771daa3f6c13231cfee2eff5b4d3b325f93ce5f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 2 Feb 2023 14:55:22 +0000 Subject: [PATCH 092/203] added test for get_contacts_of_contacts --- .../testthat/test-get_contacts_of_contacts.R | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/testthat/test-get_contacts_of_contacts.R diff --git a/tests/testthat/test-get_contacts_of_contacts.R b/tests/testthat/test-get_contacts_of_contacts.R new file mode 100644 index 0000000..170436f --- /dev/null +++ b/tests/testthat/test-get_contacts_of_contacts.R @@ -0,0 +1,37 @@ +test_that("get_contacts_of_contacts works as expected", { + skip("get_contacts_of_contacts requires API call") + + res <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 39L)) + expect_identical( + colnames(res), + c("id", "visualId", "dateOfReporting", "isDateOfReportingApproximate", + "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "firstName", "middleName", "lastName", + "gender", "occupation", "dob", "classification", "wasContact", "wasCase", + "dateBecomeContact", "riskLevel", "riskReason", "outcomeId", + "dateOfOutcome", "documents", "type", "transferRefused", "addresses", + "safeBurial", "dateOfBurial", "dateOfLastContact", "numberOfExposures", + "vaccinesReceived", "pregnancyStatus", "responsibleUserId", + "relationship", "age.years", "age.months") + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "logical", "character", + "character", "character", "character", "logical", "logical", "character", + "character", "logical", "character", "character", "logical", "logical", + "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "logical", "list", "character", "logical", "list", "logical", + "logical", "character", "integer", "list", "character", "logical", + "logical", "integer", "logical") + ) +}) From e23a8897b18e930ee97b0b40020fe391003ea8e1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:04:10 +0000 Subject: [PATCH 093/203] linted get_languages --- R/get_languages.R | 36 ++++++++++++++++++++++++------------ man/get_languages.Rd | 11 +++++++---- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/R/get_languages.R b/R/get_languages.R index 6777c3e..96969ec 100644 --- a/R/get_languages.R +++ b/R/get_languages.R @@ -1,7 +1,8 @@ #' Get lanuages in Go.Data #' #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @@ -15,9 +16,11 @@ #' password <- "mypassword" #' language <- "english_us" #' -#' languages <- get_languages(url=url, -#' username=username, -#' password=password) +#' languages <- get_languages( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -27,16 +30,25 @@ #' @importFrom purrr pluck -get_languages <- function(url=url, - username=username, - password=password) { +get_languages <- function(url = url, + username = username, + password = password) { - df <- GET(paste0(url,"api/languages/", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% + df <- GET( + paste0( + url, + "api/languages/", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% as_tibble() return(df) - } diff --git a/man/get_languages.Rd b/man/get_languages.Rd index 2790301..6ad8a3a 100644 --- a/man/get_languages.Rd +++ b/man/get_languages.Rd @@ -7,7 +7,8 @@ get_languages(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -26,8 +27,10 @@ username <- "myemail@email.com" password <- "mypassword" language <- "english_us" -languages <- get_languages(url=url, - username=username, - password=password) +languages <- get_languages( + url = url, + username = username, + password = password +) } } From 4e44181c746978f722b6e442333c69eaf957db4b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:05:08 +0000 Subject: [PATCH 094/203] removed pipe from get_languages --- R/get_languages.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/get_languages.R b/R/get_languages.R index 96969ec..8e46b9c 100644 --- a/R/get_languages.R +++ b/R/get_languages.R @@ -34,7 +34,7 @@ get_languages <- function(url = url, username = username, password = password) { - df <- GET( + df_request <- GET( paste0( url, "api/languages/", @@ -45,10 +45,13 @@ get_languages <- function(url = url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - as_tibble() + ) + + df_content <- content(df_request, as = "text") + + df <- fromJSON(df_content, flatten = TRUE) + + df <- as_tibble(df) return(df) } From 2ec3a5f2a46f609d3894d0251bc87843d5e88bf9 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:05:35 +0000 Subject: [PATCH 095/203] removed recursive default arugments from get_languages --- R/get_languages.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get_languages.R b/R/get_languages.R index 8e46b9c..0d084db 100644 --- a/R/get_languages.R +++ b/R/get_languages.R @@ -30,9 +30,9 @@ #' @importFrom purrr pluck -get_languages <- function(url = url, - username = username, - password = password) { +get_languages <- function(url, + username, + password) { df_request <- GET( paste0( From 668c3a8d678a68b007a003cb1b4e661f02ebfe5c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:07:29 +0000 Subject: [PATCH 096/203] used explicit namespace instead of import for get_languages --- R/get_languages.R | 16 ++++------------ man/get_languages.Rd | 2 +- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/R/get_languages.R b/R/get_languages.R index 0d084db..c227830 100644 --- a/R/get_languages.R +++ b/R/get_languages.R @@ -22,19 +22,11 @@ #' password = password #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - get_languages <- function(url, username, password) { - df_request <- GET( + df_request <- httr::GET( paste0( url, "api/languages/", @@ -47,11 +39,11 @@ get_languages <- function(url, ) ) - df_content <- content(df_request, as = "text") + df_content <- httr::content(df_request, as = "text") - df <- fromJSON(df_content, flatten = TRUE) + df <- jsonlite::fromJSON(df_content, flatten = TRUE) - df <- as_tibble(df) + df <- tibble::as_tibble(df) return(df) } diff --git a/man/get_languages.Rd b/man/get_languages.Rd index 6ad8a3a..003e55d 100644 --- a/man/get_languages.Rd +++ b/man/get_languages.Rd @@ -4,7 +4,7 @@ \alias{get_languages} \title{Get lanuages in Go.Data} \usage{ -get_languages(url = url, username = username, password = password) +get_languages(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 31389c7a20f23b2d1c103c25ee3e1e483b9a837a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:12:13 +0000 Subject: [PATCH 097/203] added test for get_languages --- tests/testthat/test-get_languages.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/testthat/test-get_languages.R diff --git a/tests/testthat/test-get_languages.R b/tests/testthat/test-get_languages.R new file mode 100644 index 0000000..16cc933 --- /dev/null +++ b/tests/testthat/test-get_languages.R @@ -0,0 +1,23 @@ +test_that("get_languages works as expected", { + skip("get_languages requires API call") + + res <- get_languages( + url = url, + username = username, + password = password + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(6L, 9L)) + expect_identical( + colnames(res), + c("name", "readOnly", "id", "createdAt", "createdBy", "updatedAt", + "updatedBy", "createdOn", "deleted") + ) + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "logical", "character", "character", "character", + "character", "character", "character", "logical") + ) +}) From 52e1d831036f3457c0acb11becf1fdb53194c7f6 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:16:20 +0000 Subject: [PATCH 098/203] linted get_language_tokens --- R/get_language_tokens.R | 50 ++++++++++++++++++++++++++------------ man/get_language_tokens.Rd | 25 ++++++++++++------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/R/get_language_tokens.R b/R/get_language_tokens.R index 28c365e..0fe61c1 100644 --- a/R/get_language_tokens.R +++ b/R/get_language_tokens.R @@ -1,13 +1,17 @@ -#' Get a list of variable tokens and their labels for the language you specify, in order to re-code variables in R. +#' Get a list of variable tokens and their labels for the language you specify, +#' in order to re-code variables in R. #' #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param language The language ID you are retrieving translation file for, for instance "english_us" +#' @param language The language ID you are retrieving translation file for, +#' for instance "english_us" #' #' @return -#' Returns data frame of language tokens for your language. You will only be able to execute this function if you have access to the language tokens. +#' Returns data frame of language tokens for your language. You will only be +#' able to execute this function if you have access to the language tokens. #' @export #' @examples #' \dontrun{ @@ -16,10 +20,12 @@ #' password <- "mypassword" #' language <- "english_us" #' -#' language_tokens <- get_language_tokens(url=url, -#' username=username, -#' password=password, -#' language=language) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = language +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -29,15 +35,27 @@ #' @importFrom purrr pluck -get_language_tokens <- function(url=url, - username=username, - password=password, - language=language) { +get_language_tokens <- function(url = url, + username = username, + password = password, + language = language) { - df <- GET(paste0(url,"api/languages/",language,"/language-tokens", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% + df <- GET( + paste0( + url, + "api/languages/", + language, + "/language-tokens", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% as_tibble() return(df) diff --git a/man/get_language_tokens.Rd b/man/get_language_tokens.Rd index f40f3f3..5c40d4c 100644 --- a/man/get_language_tokens.Rd +++ b/man/get_language_tokens.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/get_language_tokens.R \name{get_language_tokens} \alias{get_language_tokens} -\title{Get a list of variable tokens and their labels for the language you specify, in order to re-code variables in R.} +\title{Get a list of variable tokens and their labels for the language you specify, +in order to re-code variables in R.} \usage{ get_language_tokens( url = url, @@ -12,19 +13,23 @@ get_language_tokens( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{language}{The language ID you are retrieving translation file for, for instance "english_us"} +\item{language}{The language ID you are retrieving translation file for, +for instance "english_us"} } \value{ -Returns data frame of language tokens for your language. You will only be able to execute this function if you have access to the language tokens. +Returns data frame of language tokens for your language. You will only be +able to execute this function if you have access to the language tokens. } \description{ -Get a list of variable tokens and their labels for the language you specify, in order to re-code variables in R. +Get a list of variable tokens and their labels for the language you specify, +in order to re-code variables in R. } \examples{ \dontrun{ @@ -33,9 +38,11 @@ username <- "myemail@email.com" password <- "mypassword" language <- "english_us" -language_tokens <- get_language_tokens(url=url, - username=username, - password=password, - language=language) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = language +) } } From 72b47a0687a278e83447802c33f1d644b4d21724 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:20:52 +0000 Subject: [PATCH 099/203] removed recursive argument defaults for get_language_tokens --- R/get_language_tokens.R | 8 ++++---- man/get_language_tokens.Rd | 7 +------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/R/get_language_tokens.R b/R/get_language_tokens.R index 0fe61c1..773a541 100644 --- a/R/get_language_tokens.R +++ b/R/get_language_tokens.R @@ -35,10 +35,10 @@ #' @importFrom purrr pluck -get_language_tokens <- function(url = url, - username = username, - password = password, - language = language) { +get_language_tokens <- function(url, + username, + password, + language) { df <- GET( paste0( diff --git a/man/get_language_tokens.Rd b/man/get_language_tokens.Rd index 5c40d4c..d6ab57c 100644 --- a/man/get_language_tokens.Rd +++ b/man/get_language_tokens.Rd @@ -5,12 +5,7 @@ \title{Get a list of variable tokens and their labels for the language you specify, in order to re-code variables in R.} \usage{ -get_language_tokens( - url = url, - username = username, - password = password, - language = language -) +get_language_tokens(url, username, password, language) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 339ef2d532795c68c24668abf6f74b635df86297 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:22:01 +0000 Subject: [PATCH 100/203] removed pipe from get_language_tokens --- R/get_language_tokens.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/get_language_tokens.R b/R/get_language_tokens.R index 773a541..742cea2 100644 --- a/R/get_language_tokens.R +++ b/R/get_language_tokens.R @@ -40,7 +40,7 @@ get_language_tokens <- function(url, password, language) { - df <- GET( + df_request <- GET( paste0( url, "api/languages/", @@ -53,10 +53,13 @@ get_language_tokens <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - as_tibble() + ) + + df_content <- content(df_request, as = "text") + + df <- fromJSON(df_content, flatten = TRUE) + + df <- as_tibble(df) return(df) From 79a1801bb6e507e2c690dd3adb91e054040fd8de Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:26:38 +0000 Subject: [PATCH 101/203] used explicit namespace instead of import for get_language_tokens --- R/get_language_tokens.R | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/R/get_language_tokens.R b/R/get_language_tokens.R index 742cea2..2f0a5af 100644 --- a/R/get_language_tokens.R +++ b/R/get_language_tokens.R @@ -27,20 +27,12 @@ #' language = language #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - get_language_tokens <- function(url, username, password, language) { - df_request <- GET( + df_request <- httr::GET( paste0( url, "api/languages/", @@ -55,12 +47,11 @@ get_language_tokens <- function(url, ) ) - df_content <- content(df_request, as = "text") + df_content <- httr::content(df_request, as = "text") - df <- fromJSON(df_content, flatten = TRUE) + df <- jsonlite::fromJSON(df_content, flatten = TRUE) - df <- as_tibble(df) + df <- tibble::as_tibble(df) return(df) - } From 73cf55919d57562be5ddd8d17d79b4a30cee811e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 3 Feb 2023 10:34:45 +0000 Subject: [PATCH 102/203] added test for get_language_tokens --- tests/testthat/test-get_language_tokens.R | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/testthat/test-get_language_tokens.R diff --git a/tests/testthat/test-get_language_tokens.R b/tests/testthat/test-get_language_tokens.R new file mode 100644 index 0000000..23e975d --- /dev/null +++ b/tests/testthat/test-get_language_tokens.R @@ -0,0 +1,24 @@ +test_that("get_language_tokens works as expected", { + skip("get_language_tokens requires API call") + + res <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(13641L, 3L)) + expect_identical( + colnames(res), + c("languageId", "lastUpdateDate", "tokens") + ) + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "data.frame") + ) + expect_identical(colnames(res$tokens),c("token", "translation")) + expect_identical(dim(res$tokens), c(13641L, 2L)) +}) From af77bd75b37ec74d72a76eaa4f470485d737aa00 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Sun, 5 Feb 2023 20:07:32 +0000 Subject: [PATCH 103/203] added translate_categories function and helper functions --- NAMESPACE | 1 + R/translate_categories.R | 95 +++++++++++++++++++++++++++++++++++++ man/any_tokens.Rd | 21 ++++++++ man/translate_categories.Rd | 44 +++++++++++++++++ man/translate_token.Rd | 22 +++++++++ 5 files changed, 183 insertions(+) create mode 100644 R/translate_categories.R create mode 100644 man/any_tokens.Rd create mode 100644 man/translate_categories.Rd create mode 100644 man/translate_token.Rd diff --git a/NAMESPACE b/NAMESPACE index efb977e..4ce71b1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(get_users) export(mongify_date) export(null2na) export(set_active_outbreak) +export(translate_categories) import(data.table) import(dplyr) import(httr) diff --git a/R/translate_categories.R b/R/translate_categories.R new file mode 100644 index 0000000..a10a81d --- /dev/null +++ b/R/translate_categories.R @@ -0,0 +1,95 @@ +#' Translates cateogories with API labels to more readable forms using the +#' translation specified in the output of `get_language_tokens()`. +#' +#' @param data A data frame (or data frame extension) +#' @param language_tokens A data frame (or data frame extension) containing +#' the translations. Output from `get_language_tokens()` +#' +#' @return A tibble +#' @export +#' +#' @examples +#' \dontrun{ +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' translate_categories( +#' data = cases, +#' language_tokens = language_tokens +#' ) +#' } +translate_categories <- function(data, language_tokens) { + + stopifnot( + "data must be tabular data" = + is.data.frame(data), + "language_tokens must be tabular data" = + is.data.frame(language_tokens) + ) + + if (isFALSE(any_tokens(data, language_tokens))) { + return(data) + } + + data <- translate_token(data = data, language_tokens = language_tokens) + + data <- tibble::as_tibble(data) + + return(data) +} + +#' Translates tokens given a specified translation. +#' +#' @description This function does all the translation for +#' `translate_categories`. +#' +#' @inheritParams translate_categories +#' +#' @return A tibble +#' @keywords internal +translate_token <- function(data, language_tokens) { + if (is.list(data)) { + for (i in seq_along(data)) { + if (any_tokens(data[[i]], language_tokens)) { + data[[i]] <- translate_token( + data = data[[i]], + language_tokens = language_tokens + ) + } + } + } else { + token_index <- lapply( + data, + function(x) which(language_tokens$tokens$token %in% x) + ) + token_index <- unlist( + lapply( + token_index, + function(x) if (length(x) == 0) NA else x + ) + ) + data <- language_tokens$tokens$translation[token_index] + } + return(data) +} + +#' Checks if there are any recognized tokens in the data provided +#' +#' @inheritParams translate_categories +#' +#' @return Boolean logical (TRUE or FALSE) +#' @keywords internal +any_tokens <- function(data, language_tokens) { + any(unname(unlist(data)) %in% language_tokens$tokens$token) +} diff --git a/man/any_tokens.Rd b/man/any_tokens.Rd new file mode 100644 index 0000000..31a91ef --- /dev/null +++ b/man/any_tokens.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/translate_categories.R +\name{any_tokens} +\alias{any_tokens} +\title{Checks if there are any recognized tokens in the data provided} +\usage{ +any_tokens(data, language_tokens) +} +\arguments{ +\item{data}{A data frame (or data frame extension)} + +\item{language_tokens}{A data frame (or data frame extension) containing +the translations. Output from \code{get_language_tokens()}} +} +\value{ +Boolean logical (TRUE or FALSE) +} +\description{ +Checks if there are any recognized tokens in the data provided +} +\keyword{internal} diff --git a/man/translate_categories.Rd b/man/translate_categories.Rd new file mode 100644 index 0000000..d6fdc1b --- /dev/null +++ b/man/translate_categories.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/translate_categories.R +\name{translate_categories} +\alias{translate_categories} +\title{Translates cateogories with API labels to more readable forms using the +translation specified in the output of \code{get_language_tokens()}.} +\usage{ +translate_categories(data, language_tokens) +} +\arguments{ +\item{data}{A data frame (or data frame extension)} + +\item{language_tokens}{A data frame (or data frame extension) containing +the translations. Output from \code{get_language_tokens()}} +} +\value{ +A tibble +} +\description{ +Translates cateogories with API labels to more readable forms using the +translation specified in the output of \code{get_language_tokens()}. +} +\examples{ +\dontrun{ +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +translate_categories( + data = cases, + language_tokens = language_tokens +) +} +} diff --git a/man/translate_token.Rd b/man/translate_token.Rd new file mode 100644 index 0000000..4be70c6 --- /dev/null +++ b/man/translate_token.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/translate_categories.R +\name{translate_token} +\alias{translate_token} +\title{Translates tokens given a specified translation.} +\usage{ +translate_token(data, language_tokens) +} +\arguments{ +\item{data}{A data frame (or data frame extension)} + +\item{language_tokens}{A data frame (or data frame extension) containing +the translations. Output from \code{get_language_tokens()}} +} +\value{ +A tibble +} +\description{ +This function does all the translation for +\code{translate_categories}. +} +\keyword{internal} From 998c2c76cac58842626bf5b3718bdb55985994ff Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:06:27 +0000 Subject: [PATCH 104/203] linted get_labresults --- R/get_labresults.R | 125 +++++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 44 deletions(-) diff --git a/R/get_labresults.R b/R/get_labresults.R index fa5af62..d9e5ac9 100644 --- a/R/get_labresults.R +++ b/R/get_labresults.R @@ -22,17 +22,27 @@ #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method="export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' #' @examples @@ -42,10 +52,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' labresults <- get_labresults(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' labresults <- get_labresults( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -54,25 +66,33 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck - - -get_labresults <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_labresults <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -81,30 +101,47 @@ get_labresults <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/lab-results/aggregate-filtered-count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/lab-results/aggregate") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/lab-results/aggregate-filtered-count" + ) + api_call_get <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/lab-results/aggregate" + ) + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/lab-results/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) - - + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/lab-results/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) } return(df) From 69a0167b6cbb0913381b2b4279c33bbec9c74537 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:07:06 +0000 Subject: [PATCH 105/203] removed recursive arugment defaults for get_labresults --- R/get_labresults.R | 8 ++++---- man/get_labresults.Rd | 44 +++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/R/get_labresults.R b/R/get_labresults.R index d9e5ac9..24c98f6 100644 --- a/R/get_labresults.R +++ b/R/get_labresults.R @@ -66,10 +66,10 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_labresults <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_labresults <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, diff --git a/man/get_labresults.Rd b/man/get_labresults.Rd index 4dbac8d..3fc40cc 100644 --- a/man/get_labresults.Rd +++ b/man/get_labresults.Rd @@ -5,35 +5,45 @@ \title{Download lab results from Go.Data} \usage{ get_labresults( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method="export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the lab result data for a @@ -66,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -labresults <- get_labresults(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +labresults <- get_labresults( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 13a9f09bd77325bd575a9b8f9dbb8a77f7679fe1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:09:24 +0000 Subject: [PATCH 106/203] used explicit namespace instead of import in get_labresults --- R/get_labresults.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R/get_labresults.R b/R/get_labresults.R index 24c98f6..9ed02fa 100644 --- a/R/get_labresults.R +++ b/R/get_labresults.R @@ -59,13 +59,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck get_labresults <- function(url, username, password, @@ -143,6 +136,5 @@ get_labresults <- function(url, wait = wait ) } - - return(df) + return(tibble::as_tibble(df)) } From 56ab9a6cb6d2924033419f3bbf2dfb47b3fe2077 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:23:28 +0000 Subject: [PATCH 107/203] added test for get_labresults --- tests/testthat/test-get_labresults.R | 58 ++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/testthat/test-get_labresults.R diff --git a/tests/testthat/test-get_labresults.R b/tests/testthat/test-get_labresults.R new file mode 100644 index 0000000..3f0b7c5 --- /dev/null +++ b/tests/testthat/test-get_labresults.R @@ -0,0 +1,58 @@ +test_that("get_labresults works as expected", { + skip("get_labresults requires API call") + + res <- get_labresults( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = "export", + batch_size = 50000, + wait = 2, + file_type = "json" + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(9L, 55L)) + expect_true( + all(c( + "id", "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "personId", "dateSampleTaken", + "dateSampleDelivered", "dateTesting", "dateOfResult", "labName", + "sampleIdentifier", "sampleType", "testType", "testedFor", "result", + "quantitativeResult", "notes", "status", "sequence.hasSequence", + "sequence.dateSampleSent", "sequence.labId", "sequence.dateResult", + "sequence.resultId", "sequence.noSequenceReason", "person.visualId", + "person.type", "person.lastName", "person.firstName", + "person.dateOfOnset", "person.dateOfReporting", "person.middleName", + "person.address.typeId", "person.address.city", + "person.address.locationId", "person.address.Identifiers", + "person.address.Location geographical level", + "person.address.Parent location", "person.address.geoLocationAccurate", + "person.address.date", "person.address.country", + "person.address.addressLine1", "person.address.postalCode", + "person.address.phoneNumber", "person.address.emailAddress", + "person.address.geoLocation.lat", "person.address.geoLocation.lng", + "questionnaireAnswers.Lab_SpecimenCollection_Symptoms", + "questionnaireAnswers.Lab_SpecimenShippedAnotherLaboratory", + "questionnaireAnswers.test", + "questionnaireAnswers.Lab_specimenshipped_laboratoryname", + "questionnaireAnswers.Lab_specimenshipped_dateofshipping" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "character", "character", + "logical", "logical", "character", "character", "character", "character", + "character", "character", "logical", "character", "character", + "character", "logical", "character", "logical", "logical", "character", + "logical", "character", "character", "character", "character", "logical", + "character", "character", "character", "character", "character", + "character", "character", "character", "character", "character", "list", + "list", "list", "logical", "character", "logical", "logical", "logical", + "logical", "logical", "numeric", "numeric", "list", "list", "list", + "list", "list")) +}) From aac21b5937a878342978b727759c53fed172a2f1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:34:48 +0000 Subject: [PATCH 108/203] linted get_relationships --- R/get_relationships.R | 131 +++++++++++++++++++++++++-------------- man/get_relationships.Rd | 42 ++++++++----- 2 files changed, 112 insertions(+), 61 deletions(-) diff --git a/R/get_relationships.R b/R/get_relationships.R index cef852a..6b22300 100644 --- a/R/get_relationships.R +++ b/R/get_relationships.R @@ -6,33 +6,43 @@ #' This function works on all versions of Go.Data. There #' are two methods for downloading the data: #' -#' `method="batches"` will work on all versions of +#' `method = "batches"` will work on all versions of #' Go.Data. This method relies on the GET outbreak/{id}/relationships #' API endpoint. Records are then retrieved in batches #' based on `batch_size` and appended together into -#' a final dataset. `method="batches"` will be the default and +#' a final dataset. `method = "batches"` will be the default and #' only available method for Go.Data version 2.38.0 or older. #' -#' `method="export"` will only work on Go.Data versions +#' `method = "export"` will only work on Go.Data versions #' 2.38.1 or newer. This method relies on the GET #' outbreak/{id}/relationships/export API endpoint. An export #' request is submitted to the server, and then when the #' export is ready, it will be downloaded. Due to better -#' performance and more options, `method="export"` will +#' performance and more options, `method = "export"` will #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method = "export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' #' @examples @@ -42,10 +52,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' relationships <- get_relationships(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' relationships <- get_relationships( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -54,25 +66,33 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck - - -get_relationships <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_relationships <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -81,28 +101,47 @@ get_relationships <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/relationships/count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/relationships") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/relationships/count" + ) + api_call_get <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/relationships" + ) + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/relationships/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/relationships/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) } diff --git a/man/get_relationships.Rd b/man/get_relationships.Rd index f485fe8..5d87b4b 100644 --- a/man/get_relationships.Rd +++ b/man/get_relationships.Rd @@ -16,24 +16,34 @@ get_relationships( ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file.type}{If \code{method = "export"}, then \code{file.type} determines Whether +the resulting data frame should contain nested fields (\code{file.type = "json"}, +the default) or an entirely flat data structure (\code{file.type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the relationship data for a @@ -43,19 +53,19 @@ specific \code{outbreak_id}. This function works on all versions of Go.Data. There are two methods for downloading the data: -\code{method="batches"} will work on all versions of +\code{method = "batches"} will work on all versions of Go.Data. This method relies on the GET outbreak/{id}/relationships API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into -a final dataset. \code{method="batches"} will be the default and +a final dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. -\code{method="export"} will only work on Go.Data versions +\code{method = "export"} will only work on Go.Data versions 2.38.1 or newer. This method relies on the GET outbreak/{id}/relationships/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to better -performance and more options, \code{method="export"} will +performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } @@ -66,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -relationships <- get_relationships(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From eab74905ce9e29f4f6a5fb284dd8e380f0b97245 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:35:51 +0000 Subject: [PATCH 109/203] removed recursive argument defaults from get_relationships --- R/get_relationships.R | 8 ++++---- man/get_relationships.Rd | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/get_relationships.R b/R/get_relationships.R index 6b22300..32c6752 100644 --- a/R/get_relationships.R +++ b/R/get_relationships.R @@ -66,10 +66,10 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_relationships <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_relationships <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, diff --git a/man/get_relationships.Rd b/man/get_relationships.Rd index 5d87b4b..f6f3a6b 100644 --- a/man/get_relationships.Rd +++ b/man/get_relationships.Rd @@ -5,14 +5,14 @@ \title{Download relationships from Go.Data} \usage{ get_relationships( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ @@ -36,9 +36,9 @@ number of records to retrieve in each iteration.} \item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method = "export"}, then \code{file.type} determines Whether -the resulting data frame should contain nested fields (\code{file.type = "json"}, -the default) or an entirely flat data structure (\code{file.type = "csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ Returns a data frame. Some fields, such as addresses, hospitalization From a31aa7573f58924a0c6e3986299d877a125c2d6d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:37:22 +0000 Subject: [PATCH 110/203] used explicit namespace instead of import in get_relationships --- R/get_relationships.R | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/R/get_relationships.R b/R/get_relationships.R index 32c6752..5c859fc 100644 --- a/R/get_relationships.R +++ b/R/get_relationships.R @@ -59,13 +59,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck get_relationships <- function(url, username, password, @@ -142,9 +135,6 @@ get_relationships <- function(url, file_type = file_type, wait = wait ) - - } - - return(df) + return(tibble::as_tibble(df)) } From ab55e6a50d6a1c6e100ce9e124e50687c7a26dc9 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:46:07 +0000 Subject: [PATCH 111/203] added test for get_relationships --- tests/testthat/test-get_relationships.R | 47 +++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/testthat/test-get_relationships.R diff --git a/tests/testthat/test-get_relationships.R b/tests/testthat/test-get_relationships.R new file mode 100644 index 0000000..3b534e3 --- /dev/null +++ b/tests/testthat/test-get_relationships.R @@ -0,0 +1,47 @@ +test_that("get_relationships works as expected", { + skip("get_relationships requires API call") + + res <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = "export", + batch_size = 50000, + wait = 2, + file_type = "json" + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(29L, 43L)) + expect_true( + all(c( + "id", "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "deletedAt", "createdOn", "dateOfFirstContact", "contactDate", + "contactDateEstimated", "certaintyLevelId", "exposureTypeId", + "exposureFrequencyId", "exposureDurationId", "socialRelationshipTypeId", + "socialRelationshipDetail", "clusterId", "comment", "sourcePerson.type", + "sourcePerson.firstName", "sourcePerson.gender", "sourcePerson.visualId", + "sourcePerson.lastName", "sourcePerson.dob", "sourcePerson.id", + "sourcePerson.source", "sourcePerson.name", "sourcePerson.middleName", + "sourcePerson.age.years", "sourcePerson.age.months", "targetPerson.type", + "targetPerson.firstName", "targetPerson.gender", "targetPerson.visualId", + "targetPerson.lastName", "targetPerson.id", "targetPerson.target", + "targetPerson.name", "targetPerson.middleName", "targetPerson.dob", + "targetPerson.age.years", "targetPerson.age.months" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "character", "character", + "logical", "logical", "character", "character", "character", "logical", + "character", "character", "character", "character", "character", + "character", "character", "character", "character", "character", + "character", "character", "character", "logical", "character", "logical", + "logical", "character", "integer", "integer", "character", "character", + "character", "character", "character", "character", "logical", + "character", "character", "character", "integer", "integer")) +}) From a0da8ea751f99ad722b315d832806972f5bece58 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:55:52 +0000 Subject: [PATCH 112/203] linted get_followups --- R/get_followups.R | 134 +++++++++++++++++++++++++++---------------- man/get_followups.Rd | 44 ++++++++------ 2 files changed, 113 insertions(+), 65 deletions(-) diff --git a/R/get_followups.R b/R/get_followups.R index 731a860..5ab10b4 100644 --- a/R/get_followups.R +++ b/R/get_followups.R @@ -6,33 +6,43 @@ #' This function works on all versions of Go.Data. There #' are two methods for downloading the data: #' -#' `method="batches"` will work on all versions of +#' `method = "batches"` will work on all versions of #' Go.Data. This method relies on the GET outbreak/{id}/follow-ups #' API endpoint. Records are then retrieved in batches #' based on `batch_size` and appended together into -#' a final dataset. `method="batches"` will be the default and +#' a final dataset. `method = "batches"` will be the default and #' only available method for Go.Data version 2.38.0 or older. #' -#' `method="export"` will only work on Go.Data versions +#' `method = "export"` will only work on Go.Data versions #' 2.38.1 or newer. This method relies on the GET #' outbreak/{id}/follow-ups/export API endpoint. An export #' request is submitted to the server, and then when the #' export is ready, it will be downloaded. Due to better -#' performance and more options, `method="export"` will +#' performance and more options, `method = "export"` will #' be the default if you are using Go.Data version 2.38.1 #' or newer. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method = "export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' #' @examples @@ -42,10 +52,12 @@ #' password <- "mypassword" #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' followups <- get_followups(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) +#' followups <- get_followups( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -54,25 +66,33 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck - - -get_followups <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { +get_followups <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) } #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { + version_check <- check_godata_version(url = url) + if (!version_check) { method <- "batches" #Older version of Go.Data can only use the batch method } else if (missing(method)) { method <- "export" # For new versions of Go.Data, default to export method @@ -81,31 +101,47 @@ get_followups <- function(url=url, if (method == "batches") { - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/follow-ups/filtered-count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/follow-ups") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/follow-ups/filtered-count" + ) + api_call_get <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/follow-ups" + ) + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) } else if (method == "export") { - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/follow-ups/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) - - + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/follow-ups/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) } - return(df) } diff --git a/man/get_followups.Rd b/man/get_followups.Rd index 82b6fcc..82e92e9 100644 --- a/man/get_followups.Rd +++ b/man/get_followups.Rd @@ -12,28 +12,38 @@ get_followups( method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the follow-up data for a @@ -43,19 +53,19 @@ specific \code{outbreak_id}. This function works on all versions of Go.Data. There are two methods for downloading the data: -\code{method="batches"} will work on all versions of +\code{method = "batches"} will work on all versions of Go.Data. This method relies on the GET outbreak/{id}/follow-ups API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into -a final dataset. \code{method="batches"} will be the default and +a final dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. -\code{method="export"} will only work on Go.Data versions +\code{method = "export"} will only work on Go.Data versions 2.38.1 or newer. This method relies on the GET outbreak/{id}/follow-ups/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to better -performance and more options, \code{method="export"} will +performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } @@ -66,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -followups <- get_followups(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +followups <- get_followups( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From c78f04e4f7b341a97eded2f0e13fb39364aa98f7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:56:36 +0000 Subject: [PATCH 113/203] removed recursive argument defaults for get_followups --- R/get_followups.R | 8 ++++---- man/get_followups.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/get_followups.R b/R/get_followups.R index 5ab10b4..5f59860 100644 --- a/R/get_followups.R +++ b/R/get_followups.R @@ -66,10 +66,10 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_followups <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_followups <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, diff --git a/man/get_followups.Rd b/man/get_followups.Rd index 82e92e9..75fd1b8 100644 --- a/man/get_followups.Rd +++ b/man/get_followups.Rd @@ -5,10 +5,10 @@ \title{Download contact follow-ups from Go.Data} \usage{ get_followups( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From 9fb10413cf8a88f6626fdd8d9585672b48d55335 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 09:57:29 +0000 Subject: [PATCH 114/203] used explicit namespace instead of import for get_followups --- R/get_followups.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/get_followups.R b/R/get_followups.R index 5f59860..6eccdb3 100644 --- a/R/get_followups.R +++ b/R/get_followups.R @@ -59,13 +59,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck get_followups <- function(url, username, password, @@ -143,5 +136,5 @@ get_followups <- function(url, wait = wait ) } - return(df) + return(tibble::as_tibble(df)) } From da9ca87205dc6295d45a5d0bbfc8cef9ea1feca4 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 10:11:42 +0000 Subject: [PATCH 115/203] added test for get_followups --- tests/testthat/test-get_followups.R | 48 +++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/testthat/test-get_followups.R diff --git a/tests/testthat/test-get_followups.R b/tests/testthat/test-get_followups.R new file mode 100644 index 0000000..345df53 --- /dev/null +++ b/tests/testthat/test-get_followups.R @@ -0,0 +1,48 @@ +test_that("get_followups works as expected", { + skip("get_followups requires API call") + + res <- get_followups( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = "export", + batch_size = 50000, + wait = 2, + file_type = "json" + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(49L, 201L)) + expect_true( + all(c( + "id", "date", "index", "createdAt", "createdBy", "updatedAt", "updatedBy", + "deleted", "deletedAt", "createdOn", "fillLocation", "teamId", "statusId", + "targeted", "comment", "responsibleUserId", "contact.firstName", + "contact.lastName", "contact.visualId", "contact.id", "address.typeId", + "address.locationId", "address.Identifiers", + "address.Location geographical level", "address.Parent location", + "address.geoLocationAccurate", "address.date", "address.country", + "address.city", "address.addressLine1", "address.postalCode", + "address.phoneNumber", "address.emailAddress", "address.geoLocation.lat", + "address.geoLocation.lng" + ) %in% colnames(res)) + ) + + expect_true( + all(grepl(pattern = "^questionnaireAnswers", x = colnames(res)[36:201])) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "integer", "character", "character", + "character", "character", "logical", "logical", "character", "logical", + "character", "character", "logical", "logical", "character", "character", + "character", "character", "character", "character", "character", "list", + "list", "list", "logical", "character", "logical", "logical", "logical", + "logical", "logical", "logical", "numeric", "numeric", "list", "list", + "list", "list", "list", "list", "list", "logical", "list", "logical", + "list", "list", rep("logical", 152), "list", "list")) +}) From 8f67af0ea6e25a277f08863460acbf30c702c44b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 10:46:20 +0000 Subject: [PATCH 116/203] linted get_events --- R/get_events.R | 258 ++++++++++++++++++++++++++-------------------- man/get_events.Rd | 44 +++++--- 2 files changed, 175 insertions(+), 127 deletions(-) diff --git a/R/get_events.R b/R/get_events.R index 88f474f..2959393 100644 --- a/R/get_events.R +++ b/R/get_events.R @@ -1,111 +1,147 @@ -#' Download events from Go.Data -#' -#' A function to retrieve the event data for a -#' specific `outbreak_id`. -#' -#' This function works on all versions of Go.Data. There -#' are two methods for downloading the data: -#' -#' `method="batches"` will work on all versions of -#' Go.Data. This method relies on the GET outbreak/{id}/events -#' API endpoint. Records are then retrieved in batches -#' based on `batch_size` and appended together into -#' a final dataset. `method="batches"` will be the default and -#' only available method for Go.Data version 2.38.0 or older. -#' -#' `method="export"` will only work on Go.Data versions -#' 2.38.1 or newer. This method relies on the GET -#' outbreak/{id}/events/export API endpoint. An export -#' request is submitted to the server, and then when the -#' export is ready, it will be downloaded. Due to better -#' performance and more options, `method="export"` will -#' be the default if you are using Go.Data version 2.38.1 -#' or newer. -#' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! -#' @param username The email address for your Go.Data login. -#' @param password The password for your Go.Data login -#' @param outbreak_id The id number for the outbreak for which you want to download data. -#' @param method The method to download data. `method="export"` is the preferred and default method for Go.Data version 2.38.1 or later. See Details. -#' @param batch_size If `method="batches"`, then `batch_size` specifies the number of records to retrieve in each iteration. -#' @param wait If `method="export"`, then `wait` is the number of seconds to wait in between iterations of checking the status of the export. -#' @param file.type If `method="export"`, then `file.type` determines Whether the resulting data frame should contain nested fields (`file.type="json"`, the default) or an entirely flat data structure (`file.type="csv"`) -#' -#' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. -#' @export -#' -#' @examples -#' \dontrun{ -#' url <- "https://MyGoDataServer.com/" -#' username <- "myemail@email.com" -#' password <- "mypassword" -#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -#' -#' events <- get_events(url=url, -#' username=username, -#' password=password, -#' outbreak_id=outbreak_id) -#' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck - - -get_events <- function(url=url, - username=username, - password=password, - outbreak_id=outbreak_id, - method=c("export","batch"), - batch_size=50000, - wait=2, - file.type=c("json","csv")) { - - #Check that outbreak_id is active - if (outbreak_id != get_active_outbreak(url=url, username=username, password=password)) { - set_active_outbreak(url=url, username=username, password=password, outbreak_id=outbreak_id) - } - - #Set default method based on current version of Go.Data - version.check <- check_godata_version(url=url) - if (!version.check) { - method <- "batches" #Older version of Go.Data can only use the batch method - } else if (missing(method)) { - method <- "export" # For new versions of Go.Data, default to export method - } - - - if (method == "batches") { - - api_call_n <- paste0(url, "api/outbreaks/",outbreak_id,"/events/filtered-count") - api_call_get <- paste0(url, "api/outbreaks/",outbreak_id,"/events") - df <- batch_downloader(url=url, - username=username, - password=password, - api_call_n=api_call_n, - api_call_get=api_call_get, - batch_size=batch_size) - - } else if (method == "export") { - - #Default value of file.type is "json" - if (missing(file.type)) file.type <- "json" - - #Submit an export request to the system - api_call_request <- paste0(url,"api/outbreaks/",outbreak_id,"/events/export") - df <- export_downloader(url=url, - username=username, - password=password, - api_call_request=api_call_request, - file.type = file.type, - wait = wait) - - - } - - return(df) -} +#' Download events from Go.Data +#' +#' A function to retrieve the event data for a +#' specific `outbreak_id`. +#' +#' This function works on all versions of Go.Data. There +#' are two methods for downloading the data: +#' +#' `method = "batches"` will work on all versions of +#' Go.Data. This method relies on the GET outbreak/{id}/events +#' API endpoint. Records are then retrieved in batches +#' based on `batch_size` and appended together into +#' a final dataset. `method = "batches"` will be the default and +#' only available method for Go.Data version 2.38.0 or older. +#' +#' `method = "export"` will only work on Go.Data versions +#' 2.38.1 or newer. This method relies on the GET +#' outbreak/{id}/events/export API endpoint. An export +#' request is submitted to the server, and then when the +#' export is ready, it will be downloaded. Due to better +#' performance and more options, `method = "export"` will +#' be the default if you are using Go.Data version 2.38.1 +#' or newer. +#' +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! +#' @param username The email address for your Go.Data login. +#' @param password The password for your Go.Data login +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method = "export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. See +#' Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) +#' +#' @return +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' events <- get_events( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' } +#' @importFrom magrittr %>% +#' @import dplyr +#' @import tidyr +#' @import httr +#' @import tibble +#' @importFrom jsonlite fromJSON +#' @importFrom purrr pluck +get_events <- function(url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = c("export", "batch"), + batch_size = 50000, + wait = 2, + file_type = c("json", "csv")) { + + #Check that outbreak_id is active + active_outbreak <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + } + + #Set default method based on current version of Go.Data + version_check <- check_godata_version(url = url) + if (!version_check) { + method <- "batches" #Older version of Go.Data can only use the batch method + } else if (missing(method)) { + method <- "export" # For new versions of Go.Data, default to export method + } + + + if (method == "batches") { + + api_call_n <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/events/filtered-count" + ) + api_call_get <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/events" + ) + df <- batch_downloader( + url = url, + username = username, + password = password, + api_call_n = api_call_n, + api_call_get = api_call_get, + batch_size = batch_size + ) + + } else if (method == "export") { + + #Default value of file_type is "json" + if (missing(file_type)) file_type <- "json" + + #Submit an export request to the system + api_call_request <- paste0( + url, + "api/outbreaks/", + outbreak_id, + "/events/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = file_type, + wait = wait + ) + } + return(df) +} diff --git a/man/get_events.Rd b/man/get_events.Rd index 6985f16..ba678f3 100644 --- a/man/get_events.Rd +++ b/man/get_events.Rd @@ -12,28 +12,38 @@ get_events( method = c("export", "batch"), batch_size = 50000, wait = 2, - file.type = c("json", "csv") + file_type = c("json", "csv") ) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} -\item{outbreak_id}{The id number for the outbreak for which you want to download data.} +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} -\item{method}{The method to download data. \code{method="export"} is the preferred and default method for Go.Data version 2.38.1 or later. See Details.} +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. See +Details.} -\item{batch_size}{If \code{method="batches"}, then \code{batch_size} specifies the number of records to retrieve in each iteration.} +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} -\item{wait}{If \code{method="export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} -\item{file.type}{If \code{method="export"}, then \code{file.type} determines Whether the resulting data frame should contain nested fields (\code{file.type="json"}, the default) or an entirely flat data structure (\code{file.type="csv"})} +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve the event data for a @@ -43,19 +53,19 @@ specific \code{outbreak_id}. This function works on all versions of Go.Data. There are two methods for downloading the data: -\code{method="batches"} will work on all versions of +\code{method = "batches"} will work on all versions of Go.Data. This method relies on the GET outbreak/{id}/events API endpoint. Records are then retrieved in batches based on \code{batch_size} and appended together into -a final dataset. \code{method="batches"} will be the default and +a final dataset. \code{method = "batches"} will be the default and only available method for Go.Data version 2.38.0 or older. -\code{method="export"} will only work on Go.Data versions +\code{method = "export"} will only work on Go.Data versions 2.38.1 or newer. This method relies on the GET outbreak/{id}/events/export API endpoint. An export request is submitted to the server, and then when the export is ready, it will be downloaded. Due to better -performance and more options, \code{method="export"} will +performance and more options, \code{method = "export"} will be the default if you are using Go.Data version 2.38.1 or newer. } @@ -66,9 +76,11 @@ username <- "myemail@email.com" password <- "mypassword" outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" -events <- get_events(url=url, - username=username, - password=password, - outbreak_id=outbreak_id) +events <- get_events( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) } } From 25fcab92149da9be64638af82972e0101091ea97 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 10:47:06 +0000 Subject: [PATCH 117/203] removed recurive argument defaults from get_events --- R/get_events.R | 8 ++++---- man/get_events.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/get_events.R b/R/get_events.R index 2959393..608e25a 100644 --- a/R/get_events.R +++ b/R/get_events.R @@ -66,10 +66,10 @@ #' @import tibble #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck -get_events <- function(url = url, - username = username, - password = password, - outbreak_id = outbreak_id, +get_events <- function(url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, diff --git a/man/get_events.Rd b/man/get_events.Rd index ba678f3..f7ad66f 100644 --- a/man/get_events.Rd +++ b/man/get_events.Rd @@ -5,10 +5,10 @@ \title{Download events from Go.Data} \usage{ get_events( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id, + url, + username, + password, + outbreak_id, method = c("export", "batch"), batch_size = 50000, wait = 2, From 6736ff184b5c9594e80eadae3f573a26945e2cd9 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 10:48:53 +0000 Subject: [PATCH 118/203] used explicit namespace instead of import for get_events --- R/get_events.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/get_events.R b/R/get_events.R index 608e25a..cbb2a29 100644 --- a/R/get_events.R +++ b/R/get_events.R @@ -59,13 +59,6 @@ #' outbreak_id = outbreak_id #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @import tibble -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck get_events <- function(url, username, password, @@ -143,5 +136,5 @@ get_events <- function(url, wait = wait ) } - return(df) + return(tibble::as_tibble(df)) } From e54a4b32598153155f4fb9b68d024b6cc265d77d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 10:59:15 +0000 Subject: [PATCH 119/203] added test for get_events --- tests/testthat/test-get_events.R | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/testthat/test-get_events.R diff --git a/tests/testthat/test-get_events.R b/tests/testthat/test-get_events.R new file mode 100644 index 0000000..b66d8ee --- /dev/null +++ b/tests/testthat/test-get_events.R @@ -0,0 +1,43 @@ +test_that("get_events works as expected", { + skip("get_events requires API call") + + res <- get_events( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id, + method = "export", + batch_size = 50000, + wait = 2, + file_type = "json" + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(2L, 34L)) + expect_true( + all(c( + "id", "dateOfReporting", "isDateOfReportingApproximate", "createdAt", + "createdBy", "updatedAt", "updatedBy", "deleted", "deletedAt", + "createdOn", "type", "numberOfExposures", "numberOfContacts", "name", + "date", "description", "responsibleUserId", "eventCategory", "endDate", + "address.typeId", "address.locationId", "address.Identifiers", + "address.Location geographical level", "address.Parent location", + "address.geoLocationAccurate", "address.date", "address.country", + "address.city", "address.addressLine1", "address.postalCode", + "address.phoneNumber", "address.emailAddress", "address.geoLocation.lat", + "address.geoLocation.lng" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "logical", "character", "character", + "character", "character", "logical", "logical", "character", "character", + "integer", "integer", "character", "character", "logical", "logical", + "logical", "character", "character", "character", "list", "list", + "list", "logical", "character", "logical", "logical", "logical", + "logical", "logical", "logical", "numeric", "numeric")) + +}) From 810789b1584b5879734720094aa457f97d306adb Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:05:19 +0000 Subject: [PATCH 120/203] linted get_teams --- R/get_teams.R | 31 ++++++++++++++++++++----------- man/get_teams.Rd | 3 ++- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/R/get_teams.R b/R/get_teams.R index d6b799e..dbb5279 100644 --- a/R/get_teams.R +++ b/R/get_teams.R @@ -6,7 +6,8 @@ #' endpoint). This function relies on the #' `\teams` API endpoint. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @@ -30,17 +31,25 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export +get_teams <- function(url = url, + username = username, + password = password) { - -get_teams <- function(url=url, - username=username, - password=password) { - - teams <- GET(paste0(url,"api/teams", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(deleted!=TRUE) + teams <- GET( + paste0( + url, + "api/teams", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$deleted != TRUE) return(teams) diff --git a/man/get_teams.Rd b/man/get_teams.Rd index add05f6..36993cc 100644 --- a/man/get_teams.Rd +++ b/man/get_teams.Rd @@ -7,7 +7,8 @@ get_teams(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} From 7b83f3636e08bfb6e95b5fdfc1d5e42851c2ee6a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:06:44 +0000 Subject: [PATCH 121/203] removed recursive default arguments in get_teams --- R/get_teams.R | 6 +++--- man/get_teams.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_teams.R b/R/get_teams.R index dbb5279..43aaa16 100644 --- a/R/get_teams.R +++ b/R/get_teams.R @@ -31,9 +31,9 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export -get_teams <- function(url = url, - username = username, - password = password) { +get_teams <- function(url, + username, + password) { teams <- GET( paste0( diff --git a/man/get_teams.Rd b/man/get_teams.Rd index 36993cc..f6436d5 100644 --- a/man/get_teams.Rd +++ b/man/get_teams.Rd @@ -4,7 +4,7 @@ \alias{get_teams} \title{#' Get teams data from Go.Data} \usage{ -get_teams(url = url, username = username, password = password) +get_teams(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From d5f51dd113ec5252757b7c5ddd88ce0562a068f6 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:09:23 +0000 Subject: [PATCH 122/203] removed pipe from get_teams --- R/get_teams.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/get_teams.R b/R/get_teams.R index 43aaa16..71d3436 100644 --- a/R/get_teams.R +++ b/R/get_teams.R @@ -35,7 +35,7 @@ get_teams <- function(url, username, password) { - teams <- GET( + teams_request <- GET( paste0( url, "api/teams", @@ -46,11 +46,10 @@ get_teams <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$deleted != TRUE) + ) + teams_content <- content(teams_request, as = "text") + teams <- fromJSON(teams_content, flatten = TRUE) + teams <- filter(teams, .data$deleted != TRUE) return(teams) - } From 4c41361f3795132cf117c86cb18cc59bd727252e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:10:49 +0000 Subject: [PATCH 123/203] used explicit namespace instead of import in get_teams --- R/get_teams.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/get_teams.R b/R/get_teams.R index 71d3436..0c52325 100644 --- a/R/get_teams.R +++ b/R/get_teams.R @@ -24,18 +24,12 @@ #' username=username, #' password=password) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck #' @export get_teams <- function(url, username, password) { - teams_request <- GET( + teams_request <- httr::GET( paste0( url, "api/teams", @@ -47,9 +41,9 @@ get_teams <- function(url, ) ) ) - teams_content <- content(teams_request, as = "text") - teams <- fromJSON(teams_content, flatten = TRUE) - teams <- filter(teams, .data$deleted != TRUE) + teams_content <- httr::content(teams_request, as = "text") + teams <- jsonlite::fromJSON(teams_content, flatten = TRUE) + teams <- dplyr::filter(teams, .data$deleted != TRUE) return(teams) } From 8addbebefa2fb5a6ca5ab457f6684fe63ad7b7c1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:12:01 +0000 Subject: [PATCH 124/203] ensure get_teams returns a tibble --- R/get_teams.R | 9 ++++++--- man/get_teams.Rd | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/R/get_teams.R b/R/get_teams.R index 0c52325..72ffd34 100644 --- a/R/get_teams.R +++ b/R/get_teams.R @@ -20,9 +20,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' teams <- get_teams(url=url, -#' username=username, -#' password=password) +#' teams <- get_teams( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @export get_teams <- function(url, @@ -44,6 +46,7 @@ get_teams <- function(url, teams_content <- httr::content(teams_request, as = "text") teams <- jsonlite::fromJSON(teams_content, flatten = TRUE) teams <- dplyr::filter(teams, .data$deleted != TRUE) + teams <- tibble::as_tibble(teams) return(teams) } diff --git a/man/get_teams.Rd b/man/get_teams.Rd index f6436d5..2879e2d 100644 --- a/man/get_teams.Rd +++ b/man/get_teams.Rd @@ -30,8 +30,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -teams <- get_teams(url=url, - username=username, - password=password) +teams <- get_teams( + url = url, + username = username, + password = password +) } } From 1b26a090bea5a883aa3e8734c9dc0c3403b42bed Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:15:25 +0000 Subject: [PATCH 125/203] added test for get_teams --- tests/testthat/test-get_teams.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/testthat/test-get_teams.R diff --git a/tests/testthat/test-get_teams.R b/tests/testthat/test-get_teams.R new file mode 100644 index 0000000..efc960b --- /dev/null +++ b/tests/testthat/test-get_teams.R @@ -0,0 +1,22 @@ +test_that("get_teams works as expected", { + skip("get_teams requires API call") + + + res <- get_teams(url = url, username = username, password = password) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(3L, 10L)) + expect_true( + all(c( + "name", "userIds", "locationIds", "id", "createdAt", "createdBy", + "updatedAt", "updatedBy", "createdOn", "deleted" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "list", "list", "character", "character", "character", + "character", "character", "character", "logical")) +}) From d6167e563c4db5100bca166b37583e011ff22d87 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:15:56 +0000 Subject: [PATCH 126/203] updated NAMESPACE --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 4ce71b1..2120a66 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,7 +37,6 @@ import(jsonlite) import(lubridate) import(purrr) import(stringr) -import(tibble) import(tidyr) import(urltools) importFrom(jsonlite,fromJSON) From b6b4a79b6659bc8186e1fcd9451e93f1a477c32b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:20:24 +0000 Subject: [PATCH 127/203] linted get_locations --- R/get_locations.R | 37 +++++++++++++++++++++++++------------ man/get_locations.Rd | 11 +++++++---- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/R/get_locations.R b/R/get_locations.R index 1489ba7..c16ee7b 100644 --- a/R/get_locations.R +++ b/R/get_locations.R @@ -6,7 +6,8 @@ #' endpoint). This function relies on the `\locations` #' API endpoint. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @@ -19,9 +20,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' locations <- get_locations(url=url, -#' username=username, -#' password=password) +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -32,15 +35,25 @@ #' @export -get_locations <- function(url=url, - username=username, - password=password) { +get_locations <- function(url = url, + username = username, + password = password) { - locations <- GET(paste0(url,"api/locations", - "?access_token=",godataR::get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(deleted!=TRUE) + locations <- GET( + paste0( + url, + "api/locations", + "?access_token=", + godataR::get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$deleted != TRUE) return(locations) diff --git a/man/get_locations.Rd b/man/get_locations.Rd index 67cb4db..e035f4f 100644 --- a/man/get_locations.Rd +++ b/man/get_locations.Rd @@ -7,7 +7,8 @@ get_locations(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -29,8 +30,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -locations <- get_locations(url=url, - username=username, - password=password) +locations <- get_locations( + url = url, + username = username, + password = password +) } } From ee83866667bda44f5cbf8fb45789a70729cd0e5e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:21:07 +0000 Subject: [PATCH 128/203] removed recursive arugment defaults for get_locations --- R/get_locations.R | 6 +++--- man/get_locations.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_locations.R b/R/get_locations.R index c16ee7b..577f90b 100644 --- a/R/get_locations.R +++ b/R/get_locations.R @@ -35,9 +35,9 @@ #' @export -get_locations <- function(url = url, - username = username, - password = password) { +get_locations <- function(url, + username, + password) { locations <- GET( paste0( diff --git a/man/get_locations.Rd b/man/get_locations.Rd index e035f4f..77b1f98 100644 --- a/man/get_locations.Rd +++ b/man/get_locations.Rd @@ -4,7 +4,7 @@ \alias{get_locations} \title{Get location data from Go.Data} \usage{ -get_locations(url = url, username = username, password = password) +get_locations(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 113a45fcbd8a0d046a88b88ce3c0c1a10fe7cc4b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:23:02 +0000 Subject: [PATCH 129/203] removed pipe from get_locations --- R/get_locations.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/get_locations.R b/R/get_locations.R index 577f90b..4db6005 100644 --- a/R/get_locations.R +++ b/R/get_locations.R @@ -33,13 +33,11 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export - - get_locations <- function(url, username, password) { - locations <- GET( + locations_request <- GET( paste0( url, "api/locations", @@ -50,11 +48,10 @@ get_locations <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$deleted != TRUE) + ) + locations_content <- content(locations_request, as = "text") + locations <- fromJSON(locations_content, flatten = TRUE) + locations <- filter(locations, .data$deleted != TRUE) return(locations) - } From e26c7ec02cb0f84e5d1c6da040c2f743e9f180d7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:24:06 +0000 Subject: [PATCH 130/203] used explicit namespace instead of import in get_locations --- R/get_locations.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/get_locations.R b/R/get_locations.R index 4db6005..9aa8c52 100644 --- a/R/get_locations.R +++ b/R/get_locations.R @@ -26,18 +26,12 @@ #' password = password #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck #' @export get_locations <- function(url, username, password) { - locations_request <- GET( + locations_request <- httr::GET( paste0( url, "api/locations", @@ -49,9 +43,9 @@ get_locations <- function(url, ) ) ) - locations_content <- content(locations_request, as = "text") - locations <- fromJSON(locations_content, flatten = TRUE) - locations <- filter(locations, .data$deleted != TRUE) + locations_content <- httr::content(locations_request, as = "text") + locations <- jsonlite::fromJSON(locations_content, flatten = TRUE) + locations <- dplyr::filter(locations, .data$deleted != TRUE) return(locations) } From c0a6a8cd233b9862091cb1d162755319d2bcc0ca Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:24:56 +0000 Subject: [PATCH 131/203] ensure get_locations returns a tibble --- R/get_locations.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/get_locations.R b/R/get_locations.R index 9aa8c52..decf897 100644 --- a/R/get_locations.R +++ b/R/get_locations.R @@ -46,6 +46,7 @@ get_locations <- function(url, locations_content <- httr::content(locations_request, as = "text") locations <- jsonlite::fromJSON(locations_content, flatten = TRUE) locations <- dplyr::filter(locations, .data$deleted != TRUE) + locations <- tibble::as_tibble(locations) return(locations) } From 2dd80e32f7026ced32e21c24ba62ba1df7dfe111 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:28:53 +0000 Subject: [PATCH 132/203] added test for get_locations --- tests/testthat/test-get_locations.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/testthat/test-get_locations.R diff --git a/tests/testthat/test-get_locations.R b/tests/testthat/test-get_locations.R new file mode 100644 index 0000000..fc610d0 --- /dev/null +++ b/tests/testthat/test-get_locations.R @@ -0,0 +1,24 @@ +test_that("get_locations works as expected", { + skip("get_locations requires API call") + + res <- get_locations(url = url, username = username, password = password) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(11L, 16L)) + expect_true( + all(c( + "name", "synonyms", "identifiers", "active", "populationDensity", + "parentLocationId", "geographicalLevelId", "id", "createdAt", + "createdBy", "updatedAt", "updatedBy", "createdOn", "deleted", + "geoLocation.lat", "geoLocation.lng" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "list", "list", "logical", "integer", "character", + "character", "character", "character", "character", "character", + "character", "character", "logical", "numeric", "numeric")) +}) From b7f722dfca344e98de281220fc7c4a5837a5ca08 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:33:17 +0000 Subject: [PATCH 133/203] linted get_reference_data --- R/get_reference_data.R | 40 ++++++++++++++++++++++++--------------- man/get_reference_data.Rd | 11 +++++++---- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index 5e2f0b7..b56b288 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -6,7 +6,8 @@ #' endpoint). This function relies on the #' `\reference-data` API endpoint. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' @@ -19,9 +20,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' reference_data <- get_reference_data(url=url, -#' username=username, -#' password=password) +#' reference_data <- get_reference_data( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -30,18 +33,25 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export +get_reference_data <- function(url = url, + username = username, + password = password) { - -get_reference_data <- function(url=url, - username=username, - password=password) { - - reference_data <- GET(paste0(url,"api/reference-data", - "?access_token=",get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(deleted!=TRUE) + reference_data <- GET( + paste0( + url, + "api/reference-data", + "?access_token=", + get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$deleted != TRUE) return(reference_data) - } diff --git a/man/get_reference_data.Rd b/man/get_reference_data.Rd index 2b42b35..b5a5832 100644 --- a/man/get_reference_data.Rd +++ b/man/get_reference_data.Rd @@ -7,7 +7,8 @@ get_reference_data(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} @@ -29,8 +30,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -reference_data <- get_reference_data(url=url, - username=username, - password=password) +reference_data <- get_reference_data( + url = url, + username = username, + password = password +) } } From d203ca81f5372ea71a06dedf702ce3b46908c4e8 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:33:59 +0000 Subject: [PATCH 134/203] removed recursive default arguments in get_reference_data --- R/get_reference_data.R | 6 +++--- man/get_reference_data.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index b56b288..9b523a8 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -33,9 +33,9 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export -get_reference_data <- function(url = url, - username = username, - password = password) { +get_reference_data <- function(url, + username, + password) { reference_data <- GET( paste0( diff --git a/man/get_reference_data.Rd b/man/get_reference_data.Rd index b5a5832..e21b311 100644 --- a/man/get_reference_data.Rd +++ b/man/get_reference_data.Rd @@ -4,7 +4,7 @@ \alias{get_reference_data} \title{Get reference data from Go.Data} \usage{ -get_reference_data(url = url, username = username, password = password) +get_reference_data(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 54f00e54f8d590161e9290c305cfffe5c5bd80dc Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:36:54 +0000 Subject: [PATCH 135/203] removed pipe from get_reference_data --- R/get_reference_data.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index 9b523a8..7714ce6 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -37,7 +37,7 @@ get_reference_data <- function(url, username, password) { - reference_data <- GET( + reference_data_request <- GET( paste0( url, "api/reference-data", @@ -48,10 +48,10 @@ get_reference_data <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$deleted != TRUE) + ) + reference_data_content <- content(reference_data_request, as = "text") + reference_data <- fromJSON(reference_data_content, flatten = TRUE) + reference_data <- filter(reference_data, .data$deleted != TRUE) return(reference_data) } From e4d0627c98d0950eaa77ab807fff45fdbbe25836 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:38:04 +0000 Subject: [PATCH 136/203] used explicit namespace instead of import for get_reference_data --- R/get_reference_data.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index 7714ce6..1ad0afe 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -26,18 +26,12 @@ #' password = password #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck #' @export get_reference_data <- function(url, username, password) { - reference_data_request <- GET( + reference_data_request <- httr::GET( paste0( url, "api/reference-data", @@ -49,9 +43,9 @@ get_reference_data <- function(url, ) ) ) - reference_data_content <- content(reference_data_request, as = "text") - reference_data <- fromJSON(reference_data_content, flatten = TRUE) - reference_data <- filter(reference_data, .data$deleted != TRUE) + reference_data_content <- httr::content(reference_data_request, as = "text") + reference_data <- jsonlite::fromJSON(reference_data_content, flatten = TRUE) + reference_data <- dplyr::filter(reference_data, .data$deleted != TRUE) return(reference_data) } From cc3dbe671399b243cb170ad841809d37b031b418 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:39:10 +0000 Subject: [PATCH 137/203] ensure get_reference_data returns a tibble --- R/get_reference_data.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index 1ad0afe..bfaf798 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -46,6 +46,7 @@ get_reference_data <- function(url, reference_data_content <- httr::content(reference_data_request, as = "text") reference_data <- jsonlite::fromJSON(reference_data_content, flatten = TRUE) reference_data <- dplyr::filter(reference_data, .data$deleted != TRUE) + reference_data <- tibble::as_tibble(reference_data) return(reference_data) } From 175c6cb880912de3992693e8c2e284a20d499852 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 11:44:04 +0000 Subject: [PATCH 138/203] added test for get_reference_data --- tests/testthat/test-get_reference_data.R | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/testthat/test-get_reference_data.R diff --git a/tests/testthat/test-get_reference_data.R b/tests/testthat/test-get_reference_data.R new file mode 100644 index 0000000..864014b --- /dev/null +++ b/tests/testthat/test-get_reference_data.R @@ -0,0 +1,25 @@ +test_that("get_reference_data works as expected", { + skip("get_reference_data requires API call") + + res <- get_reference_data(url = url, username = username, password = password) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(391L, 19L)) + expect_true( + all(c( + "categoryId", "value", "description", "readOnly", "active", + "isDefaultReferenceData", "id", "createdAt", "createdBy", "updatedAt", + "updatedBy", "createdOn", "deleted", "colorCode", "order", + "isOutbreakTemplateReferenceData", "deletedAt", "iconId", "code" + ) %in% colnames(res)) + ) + + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "logical", "logical", "logical", + "character", "character", "character", "character", "character", + "character", "logical", "character", "integer", "logical", "logical", + "character", "character")) +}) From 36cb8df8b04fa7bbcc2e5119ff43b7a9c3a8fd26 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 14:56:34 +0000 Subject: [PATCH 139/203] linting tests --- R/get_reference_data.R | 2 +- tests/testthat/test-batch_downloader.R | 14 +++++++------- tests/testthat/test-get_access_token.R | 1 - tests/testthat/test-get_all_outbreaks.R | 2 +- tests/testthat/test-get_godata_version.R | 2 +- tests/testthat/test-get_language_tokens.R | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/R/get_reference_data.R b/R/get_reference_data.R index bfaf798..4f78ee6 100644 --- a/R/get_reference_data.R +++ b/R/get_reference_data.R @@ -48,5 +48,5 @@ get_reference_data <- function(url, reference_data <- dplyr::filter(reference_data, .data$deleted != TRUE) reference_data <- tibble::as_tibble(reference_data) - return(reference_data) + return(reference_data) } diff --git a/tests/testthat/test-batch_downloader.R b/tests/testthat/test-batch_downloader.R index e7da9d4..0d5168e 100644 --- a/tests/testthat/test-batch_downloader.R +++ b/tests/testthat/test-batch_downloader.R @@ -30,7 +30,7 @@ test_that("batch_downloader works as expected", { "dateBecomeCase", "wasCase", "active", "followUpHistory", "isDateOfOnsetApproximate", "outcomeId", "riskReason", "pregnancyStatus", "dateOfOutcome", "dateOfInfection", "middleName", - "questionnaireAnswers.would_you_like_to_complete_who_basic_case_questionnaire", + "questionnaireAnswers.would_you_like_to_complete_who_basic_case_questionnaire", # nolint "questionnaireAnswers.Case_WhichForm", "questionnaireAnswers.report_test_reason", "questionnaireAnswers.Comcond_present", @@ -38,7 +38,7 @@ test_that("batch_downloader works as expected", { "questionnaireAnswers.expo_travel", "questionnaireAnswers.expo_visit_healthcare", "questionnaireAnswers.patcourse", "questionnaireAnswers.Comcond_select", - "questionnaireAnswers.specify_places_and_dates_for_up_to_3_locations_below", + "questionnaireAnswers.specify_places_and_dates_for_up_to_3_locations_below", # nolint "questionnaireAnswers.expo_travel_country1", "questionnaireAnswers.expo_travel_city1", "questionnaireAnswers.expo_travel_date1", @@ -107,14 +107,14 @@ test_that("batch_downloader works as expected", { "questionnaireAnswers.FA1_preexistingconditions_diabetes", "questionnaireAnswers.FA1_preexistingconditions_HIVotherimmunedeficiency", "questionnaireAnswers.FA1_preexistingconditions_heartdisease", - "questionnaireAnswers.FA1_preexistingconditions_asthmarequiringmedication", - "questionnaireAnswers.FA1_preexistingconditions_chroniclungdiseasenonasthma", + "questionnaireAnswers.FA1_preexistingconditions_asthmarequiringmedication", # nolint + "questionnaireAnswers.FA1_preexistingconditions_chroniclungdiseasenonasthma", # nolint "questionnaireAnswers.FA1_preexistingconditions_chronicliverdisease", - "questionnaireAnswers.FA1_preexistingconditions_chronichaematologicaldisorder", + "questionnaireAnswers.FA1_preexistingconditions_chronichaematologicaldisorder", # nolint "questionnaireAnswers.FA1_preexistingconditions_chronickidneydisease", "questionnaireAnswers.FA1_preexistingconditions_chronicneurological", - "questionnaireAnswers.FA1_preexistingconditions_organorbonemarrowrecipient", - "questionnaireAnswers.FA1_preexistingconditions_otherpreexistingcondition", + "questionnaireAnswers.FA1_preexistingconditions_organorbonemarrowrecipient", # nolint + "questionnaireAnswers.FA1_preexistingconditions_otherpreexistingcondition", # nolint "questionnaireAnswers.FA1_healthcareinteractions_contactemergencynumber", "questionnaireAnswers.FA1_priorXdayexposure_travelleddomestically", "questionnaireAnswers.FA1_priorXdayexposure_travelledinternationally", diff --git a/tests/testthat/test-get_access_token.R b/tests/testthat/test-get_access_token.R index d342755..a3c8943 100644 --- a/tests/testthat/test-get_access_token.R +++ b/tests/testthat/test-get_access_token.R @@ -15,4 +15,3 @@ test_that("get_access_token works as expected", { # character string cannot contain spaces expect_false(grepl(pattern = "\\s", x = res)) }) - diff --git a/tests/testthat/test-get_all_outbreaks.R b/tests/testthat/test-get_all_outbreaks.R index af05858..06f40f9 100644 --- a/tests/testthat/test-get_all_outbreaks.R +++ b/tests/testthat/test-get_all_outbreaks.R @@ -12,7 +12,7 @@ test_that("get_all_outbreaks works as expected", { expect_identical(dim(res), c(10L, 5L)) expect_identical( colnames(res), - c("id", "name", "description", "createdBy", "createdAt" ) + c("id", "name", "description", "createdBy", "createdAt") ) expect_identical( unname(sapply(res[1, ], class)), diff --git a/tests/testthat/test-get_godata_version.R b/tests/testthat/test-get_godata_version.R index 6a8e862..cc90c58 100644 --- a/tests/testthat/test-get_godata_version.R +++ b/tests/testthat/test-get_godata_version.R @@ -5,7 +5,7 @@ test_that("get_godata_versions works as expected", { expect_type(res, "character") # character string can contain digits - expect_true(grepl(pattern = "\\d",x = res)) + expect_true(grepl(pattern = "\\d", x = res)) # character string can contain full stops expect_true(grepl(pattern = ".", x = res)) # character string cannot contain alphabetic characters diff --git a/tests/testthat/test-get_language_tokens.R b/tests/testthat/test-get_language_tokens.R index 23e975d..4985c6e 100644 --- a/tests/testthat/test-get_language_tokens.R +++ b/tests/testthat/test-get_language_tokens.R @@ -19,6 +19,6 @@ test_that("get_language_tokens works as expected", { unname(sapply(res[1, ], class)), c("character", "character", "data.frame") ) - expect_identical(colnames(res$tokens),c("token", "translation")) + expect_identical(colnames(res$tokens), c("token", "translation")) expect_identical(dim(res$tokens), c(13641L, 2L)) }) From 20141048e47919a55ffb3b68cf01c3412448379d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:03:18 +0000 Subject: [PATCH 140/203] linted get_users --- R/get_users.R | 45 ++++++++++++++++++++++++++++----------------- man/get_users.Rd | 15 ++++++++++----- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/R/get_users.R b/R/get_users.R index b318be1..5490a01 100644 --- a/R/get_users.R +++ b/R/get_users.R @@ -6,12 +6,15 @@ #' endpoint). This function relies on the #' `\users` API endpoint. #' -#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end! +#' @param url Insert the base URL for your instance of Go.Data here. Don't +#' forget the forward slash "/" at end! #' @param username The email address for your Go.Data login. #' @param password The password for your Go.Data login #' #' @return -#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +#' Returns a data frame. Some fields, such as addresses, hospitalization +#' history, and questionnaire fields may require further unnesting. See +#' \code{\link[tidyr]{nest}} for assitance with unnesting. #' @export #' @examples #' \dontrun{ @@ -19,9 +22,11 @@ #' username <- "myemail@email.com" #' password <- "mypassword" #' -#' users <- get_users(url=url, -#' username=username, -#' password=password) +#' users <- get_users( +#' url = url, +#' username = username, +#' password = password +#' ) #' } #' @importFrom magrittr %>% #' @import dplyr @@ -30,19 +35,25 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export +get_users <- function(url = url, + username = username, + password = password) { + users <- GET( + paste0( + url, + "api/users", + "?access_token=", + godataR::get_access_token( + url = url, + username = username, + password = password + ) + ) + ) %>% + content(as = "text") %>% + fromJSON(flatten = TRUE) %>% + filter(.data$deleted != TRUE) - -get_users <- function(url=url, - username=username, - password=password) { - - users <- GET(paste0(url,"api/users", - "?access_token=",godataR::get_access_token(url=url, username=username, password=password))) %>% - content(as="text") %>% - fromJSON(flatten=TRUE) %>% - filter(deleted!=TRUE) %>% - #select(id, firstName, lastName, email, roleIds, lastLoginDate, institutionName, disregardGeographicRestrictions, activeOutbreakId, createdBy, createdAt) return(users) - } diff --git a/man/get_users.Rd b/man/get_users.Rd index 0710668..bb42eef 100644 --- a/man/get_users.Rd +++ b/man/get_users.Rd @@ -7,14 +7,17 @@ get_users(url = url, username = username, password = password) } \arguments{ -\item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} +\item{url}{Insert the base URL for your instance of Go.Data here. Don't +forget the forward slash "/" at end!} \item{username}{The email address for your Go.Data login.} \item{password}{The password for your Go.Data login} } \value{ -Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting. +Returns a data frame. Some fields, such as addresses, hospitalization +history, and questionnaire fields may require further unnesting. See +\code{\link[tidyr]{nest}} for assitance with unnesting. } \description{ A function to retrieve a list of all user @@ -29,8 +32,10 @@ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" -users <- get_users(url=url, - username=username, - password=password) +users <- get_users( + url = url, + username = username, + password = password +) } } From 4d0a0d027264eb843702afbd1b95c72dcda1aaec Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:04:07 +0000 Subject: [PATCH 141/203] removed recursive argument defaults in get_users --- R/get_users.R | 6 +++--- man/get_users.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_users.R b/R/get_users.R index 5490a01..2fc7e77 100644 --- a/R/get_users.R +++ b/R/get_users.R @@ -35,9 +35,9 @@ #' @importFrom jsonlite fromJSON #' @importFrom purrr pluck #' @export -get_users <- function(url = url, - username = username, - password = password) { +get_users <- function(url, + username, + password) { users <- GET( paste0( diff --git a/man/get_users.Rd b/man/get_users.Rd index bb42eef..69f4cb8 100644 --- a/man/get_users.Rd +++ b/man/get_users.Rd @@ -4,7 +4,7 @@ \alias{get_users} \title{Get user data from Go.Data} \usage{ -get_users(url = url, username = username, password = password) +get_users(url, username, password) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't From 2569d5b7b539ff4de3fd1e16581a030608377986 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:05:58 +0000 Subject: [PATCH 142/203] removed pipe from get_users --- R/get_users.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_users.R b/R/get_users.R index 2fc7e77..f5696a7 100644 --- a/R/get_users.R +++ b/R/get_users.R @@ -39,7 +39,7 @@ get_users <- function(url, username, password) { - users <- GET( + users_request <- GET( paste0( url, "api/users", @@ -50,10 +50,10 @@ get_users <- function(url, password = password ) ) - ) %>% - content(as = "text") %>% - fromJSON(flatten = TRUE) %>% - filter(.data$deleted != TRUE) + ) + users_content <- content(users_request, as = "text") + users <- fromJSON(users_content, flatten = TRUE) + users <- filter(users, .data$deleted != TRUE) return(users) } From 3d042349306101ec19e8ed1db80ccaa17a666a0b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:06:58 +0000 Subject: [PATCH 143/203] used explicit namespace instead of import for get_users --- R/get_users.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/get_users.R b/R/get_users.R index f5696a7..9fc311b 100644 --- a/R/get_users.R +++ b/R/get_users.R @@ -28,18 +28,12 @@ #' password = password #' ) #' } -#' @importFrom magrittr %>% -#' @import dplyr -#' @import tidyr -#' @import httr -#' @importFrom jsonlite fromJSON -#' @importFrom purrr pluck #' @export get_users <- function(url, username, password) { - users_request <- GET( + users_request <- httr::GET( paste0( url, "api/users", @@ -51,9 +45,9 @@ get_users <- function(url, ) ) ) - users_content <- content(users_request, as = "text") - users <- fromJSON(users_content, flatten = TRUE) - users <- filter(users, .data$deleted != TRUE) + users_content <- httr::content(users_request, as = "text") + users <- jsonlite::fromJSON(users_content, flatten = TRUE) + users <- dplyr::filter(users, .data$deleted != TRUE) return(users) } From 39ab66d5dc88f3aabd0ae785f3682bfa74cec18d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:07:46 +0000 Subject: [PATCH 144/203] ensure get_users returns a tibble --- R/get_users.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/get_users.R b/R/get_users.R index 9fc311b..8f1765b 100644 --- a/R/get_users.R +++ b/R/get_users.R @@ -48,6 +48,7 @@ get_users <- function(url, users_content <- httr::content(users_request, as = "text") users <- jsonlite::fromJSON(users_content, flatten = TRUE) users <- dplyr::filter(users, .data$deleted != TRUE) + users <- tibble::as_tibble(users) return(users) } From f6ecc5e684d12ded45a933f316cd67540eba5f96 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:15:41 +0000 Subject: [PATCH 145/203] added test for get_users --- tests/testthat/test-get_users.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/testthat/test-get_users.R diff --git a/tests/testthat/test-get_users.R b/tests/testthat/test-get_users.R new file mode 100644 index 0000000..d3d69ac --- /dev/null +++ b/tests/testthat/test-get_users.R @@ -0,0 +1,30 @@ +test_that("get_users works as expected", { + skip("get_users requires API call") + + res <- get_users(url = url, username = username, password = password) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(20L, 67L)) + expect_true( + all(c( + "id", "firstName", "lastName", "roleIds", "activeOutbreakId", + "languageId", "passwordChange", "loginRetriesCount", "lastLoginDate", + "disregardGeographicRestrictions", "dontCacheFilters", "email", + "createdAt", "createdBy", "updatedAt", "updatedBy", "deleted", + "createdOn", "securityQuestions", "outbreakIds", "institutionName" + ) %in% colnames(res)) + ) + + expect_true( + all(grepl(pattern = "^settings", x = colnames(res)[22:66])) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "list", "character", "character", + "logical", "integer", "character", "logical", "logical", "character", + "character", "character", "character", "character", "logical", + "character", "list", "list", "character", rep("list", 12), "logical", + rep("list", 29), "logical", "list", "list", "character")) +}) From 6c0cd734422681dcffe901c4ba3087b5c6f3a828 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 15:56:35 +0000 Subject: [PATCH 146/203] linted mongify_date --- R/mongify_date.R | 25 +++++++++++++------------ man/mongify_date.Rd | 3 ++- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R/mongify_date.R b/R/mongify_date.R index 7006a9d..b4d6cda 100644 --- a/R/mongify_date.R +++ b/R/mongify_date.R @@ -33,7 +33,8 @@ #' must be in the same format). #' #' Dates with and without a time-stamp are both accepted. Dates which do include -#' a time stamp must include hours, minutes and seconds in the format **HH:mm:ss** +#' a time stamp must include hours, minutes and seconds in the format +#' **HH:mm:ss** #' #' @md #' @@ -57,32 +58,32 @@ #' #' @export mongify_date <- function(dates, - dateformat = "undefined"){ + dateformat = "undefined") { # Define format of input dates: - if(dateformat == "undefined"){ - format2search = c("ymd_HMS", "dmy_HMS", "mdy_HMS")} - if(dateformat == "ymd"){format2search = "ymd_HMS"} - if(dateformat == "dmy"){format2search = "dmy_HMS"} - if(dateformat == "mdy"){format2search = "mdy_HMS"} + if (dateformat == "undefined") { + format2search <- c("ymd_HMS", "dmy_HMS", "mdy_HMS") + } + if (dateformat == "ymd") format2search <- "ymd_HMS" + if (dateformat == "dmy") format2search <- "dmy_HMS" + if (dateformat == "mdy") format2search <- "mdy_HMS" # Make sure dates are strings: - dates = as.character(dates) + dates <- as.character(dates) # Check if dates already have a time-stamp, if not add time: - dflong = ifelse(nchar(dates) %in% c(8, 10), + dflong <- ifelse(nchar(dates) %in% c(8, 10), paste0(dates, " 00:00:00"), dates) # Convert date-times to posixct format: - dfpct = lubridate::parse_date_time(x = dflong, + dfpct <- lubridate::parse_date_time(x = dflong, orders = format2search, tz = "UTC") # Add the Godata / Mongodb specific format ending for date-time: - dfmongo = format(x = dfpct, format = "%Y-%m-%dT%H:%M:%S.000Z") + dfmongo <- format(x = dfpct, format = "%Y-%m-%dT%H:%M:%S.000Z") # Return the formatted dates: return(dfmongo) - } diff --git a/man/mongify_date.Rd b/man/mongify_date.Rd index 59bde2d..26f79bf 100644 --- a/man/mongify_date.Rd +++ b/man/mongify_date.Rd @@ -49,7 +49,8 @@ element order can be selected (i.e. all dates in the column to be converted must be in the same format). Dates with and without a time-stamp are both accepted. Dates which do include -a time stamp must include hours, minutes and seconds in the format \strong{HH:mm:ss} +a time stamp must include hours, minutes and seconds in the format +\strong{HH:mm:ss} } \examples{ # Create dummy dataframe with dates to convert: From 483a81c5092aaff68ffb234a9b49c6e23ec4bce4 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 16:21:42 +0000 Subject: [PATCH 147/203] added tests for mongify_date and linted mongify_date --- R/mongify_date.R | 16 +++++--- tests/testthat/test-mongify_date.R | 66 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/test-mongify_date.R diff --git a/R/mongify_date.R b/R/mongify_date.R index b4d6cda..90e07cf 100644 --- a/R/mongify_date.R +++ b/R/mongify_date.R @@ -72,14 +72,18 @@ mongify_date <- function(dates, dates <- as.character(dates) # Check if dates already have a time-stamp, if not add time: - dflong <- ifelse(nchar(dates) %in% c(8, 10), - paste0(dates, " 00:00:00"), - dates) + dflong <- ifelse( + nchar(dates) %in% c(8, 10), + paste0(dates, " 00:00:00"), + dates + ) # Convert date-times to posixct format: - dfpct <- lubridate::parse_date_time(x = dflong, - orders = format2search, - tz = "UTC") + dfpct <- lubridate::parse_date_time( + x = dflong, + orders = format2search, + tz = "UTC" + ) # Add the Godata / Mongodb specific format ending for date-time: dfmongo <- format(x = dfpct, format = "%Y-%m-%dT%H:%M:%S.000Z") diff --git a/tests/testthat/test-mongify_date.R b/tests/testthat/test-mongify_date.R new file mode 100644 index 0000000..35b7ca4 --- /dev/null +++ b/tests/testthat/test-mongify_date.R @@ -0,0 +1,66 @@ +test_that("mongify_date works as expected", { + + dates <- c("08/01/2022", "08/31/2022") + converted_dates <- mongify_date(dates = dates, dateformat = "mdy") + + expect_type(converted_dates, "character") + expect_length(converted_dates, length(dates)) + expect_true( + all(grepl( + pattern = "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}Z", + x = converted_dates + )) + ) +}) + +test_that("mongify_date works as expected with undefined format", { + + dates <- c("08/01/2022", "08/31/2022") + converted_dates <- mongify_date(dates = dates, dateformat = "undefined") + + expect_type(converted_dates, "character") + expect_length(converted_dates, length(dates)) + expect_true( + all(grepl( + pattern = "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}Z", + x = converted_dates + )) + ) +}) + +test_that("mongify_date works as expected with ymd format", { + + dates <- c("2022/08/01", "2022/08/31") + converted_dates <- mongify_date(dates = dates, dateformat = "ymd") + + expect_type(converted_dates, "character") + expect_length(converted_dates, length(dates)) + expect_true( + all(grepl( + pattern = "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}Z", + x = converted_dates + )) + ) +}) + +test_that("mongify_date throws warning with incorrect format", { + + # second date cannot be dmy format + dates <- c("08/01/2022", "08/31/2022") + expect_warning( + converted_dates <- mongify_date(dates = dates, dateformat = "dmy"), + regexp = "1 failed to parse." + ) + + expect_type(converted_dates, "character") + expect_true(anyNA(converted_dates)) + expect_length(converted_dates, length(dates)) + # conforms to expected format or is NA + expect_true( + all(grepl( + pattern = "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}Z", + x = converted_dates + ) | is.na(converted_dates) + ) + ) +}) From 2fabad6b6b956c82be216a1c12adb94c3a774539 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 16:28:07 +0000 Subject: [PATCH 148/203] linted get_date_range --- R/get_date_range.R | 71 +++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/R/get_date_range.R b/R/get_date_range.R index de11e1b..3597c38 100644 --- a/R/get_date_range.R +++ b/R/get_date_range.R @@ -1,59 +1,58 @@ -#' Get minimum and maximum date for a vector of dates -#' +#' Get minimum and maximum date for a vector of dates +#' #' @author Amy Mikhail, \email{amy.mikhail@@gmail.com} -#' -#' @description -#' This function will attempt to auto-detect the date format using lubridate, -#' then determine the minimum and maximum for the submitted vector of dates. +#' +#' @description +#' This function will attempt to auto-detect the date format using lubridate, +#' then determine the minimum and maximum for the submitted vector of dates. #' It is intended as a helper function for selecting date ranges and adding #' them to queries. -#' +#' #' @md -#' +#' #' @param dates character vector of dates to extract the date range from -#' -#' @return -#' Returns a list object with two values (minimum and maximum date) -#' +#' +#' @return +#' Returns a list object with two values (minimum and maximum date) +#' #' @import lubridate -#' -#' @examples +#' +#' @examples #' # Create a character vector of dates: #' x <- c("2022-07-15", "2021-08-09", NA, "2022-08-03") -#' +#' #' # Get date range: #' daterange <- get_date_range(dates = x) -#' +#' #' # View the result: #' daterange #' @export -get_date_range <- function(dates){ - +get_date_range <- function(dates) { + # Check if the input variable is already in date format: - if(any(class(dates) %in% c("Date", "POSIXt"))){ - + if (any(class(dates) %in% c("Date", "POSIXt"))) { + # If yes, no need to format: - dates2range = dates - + dates2range <- dates + } else { - + # If no, convert to date format with lubridate: - dates2range = lubridate::parse_date_time(x = dates, - orders = c("ymd", - "dmy", - "mdy")) + dates2range <- lubridate::parse_date_time( + x = dates, + orders = c("ymd", "dmy", "mdy") + ) } - + # Get the minimum date: - mindate = min(dates2range, na.rm = TRUE) - + mindate <- min(dates2range, na.rm = TRUE) + # Get the maximum date: - maxdate = max(dates2range, na.rm = TRUE) - + maxdate <- max(dates2range, na.rm = TRUE) + # Compile results: - daterange = list(mindate = mindate, maxdate = maxdate) - + daterange <- list(mindate = mindate, maxdate = maxdate) + # Return the results: return(daterange) - -} \ No newline at end of file +} From f07b89e6a22fe685bba8da79ad6702c985519ac9 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Feb 2023 16:58:38 +0000 Subject: [PATCH 149/203] added tests for get_date_range --- tests/testthat/test-get_date_range.R | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/testthat/test-get_date_range.R diff --git a/tests/testthat/test-get_date_range.R b/tests/testthat/test-get_date_range.R new file mode 100644 index 0000000..41593d9 --- /dev/null +++ b/tests/testthat/test-get_date_range.R @@ -0,0 +1,77 @@ +test_that("get_date_range works as expected", { + x <- c("2020-07-15", "2021-08-09", "2022-08-03") + date_range <- get_date_range(dates = x) + + expect_type(date_range, "list") + expect_named(date_range, c("mindate", "maxdate")) + expect_identical( + date_range, + list( + mindate = as.POSIXct("2020-07-15", tz = "UTC"), + maxdate = as.POSIXct("2022-08-03", tz = "UTC") + ) + ) +}) + +test_that("get_date_range works as expected with missing values", { + x <- c("2020-07-15", NA, "2021-08-09", "2022-08-03", NA) + date_range <- get_date_range(dates = x) + + expect_type(date_range, "list") + expect_named(date_range, c("mindate", "maxdate")) + expect_identical( + date_range, + list( + mindate = as.POSIXct("2020-07-15", tz = "UTC"), + maxdate = as.POSIXct("2022-08-03", tz = "UTC") + ) + ) +}) + +test_that("get_date_range works as expected with different formats", { + # first 3 are ymd, last is dmy + x <- c("2020-07-15", "2021-08-09", "2022-08-03", "13-04-2023") + date_range <- get_date_range(dates = x) + + expect_type(date_range, "list") + expect_named(date_range, c("mindate", "maxdate")) + expect_identical( + date_range, + list( + mindate = as.POSIXct("2020-07-15", tz = "UTC"), + maxdate = as.POSIXct("2023-04-13", tz = "UTC") + ) + ) +}) + +test_that("get_date_range cannot handle ambiguous mixed formats", { + # last two are dmy and mdy, so max date should be the last one + x <- c("2020-07-15", "2021-08-09", "2022-08-03", "01-04-2023", "05-01-2023") + date_range <- get_date_range(dates = x) + + expect_type(date_range, "list") + expect_named(date_range, c("mindate", "maxdate")) + expect_identical( + date_range, + list( + mindate = as.POSIXct("2020-07-15", tz = "UTC"), + maxdate = as.POSIXct("2023-04-01", tz = "UTC") + ) + ) +}) + +test_that("get_date_range throws warning with incorrect dates", { + x <- c("2020-07-15", "2021-08-09", "2022-08-03", "2023-13-01") + expect_warning(date_range <- get_date_range(dates = x)) + + expect_type(date_range, "list") + expect_named(date_range, c("mindate", "maxdate")) + expect_identical( + date_range, + list( + mindate = as.POSIXct("2020-07-15", tz = "UTC"), + maxdate = as.POSIXct("2022-08-03", tz = "UTC") + ) + ) +}) + From 3b9277e0ad1d9678594d0f4e0a51e613a699ebde Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Feb 2023 11:45:43 +0000 Subject: [PATCH 150/203] added clean_locations function, documentation and a test --- R/clean_locations.R | 125 ++++++++++++++++++++++++++ man/clean_locations.Rd | 34 +++++++ tests/testthat/test-clean_locations.R | 35 ++++++++ 3 files changed, 194 insertions(+) create mode 100644 R/clean_locations.R create mode 100644 man/clean_locations.Rd create mode 100644 tests/testthat/test-clean_locations.R diff --git a/R/clean_locations.R b/R/clean_locations.R new file mode 100644 index 0000000..b008684 --- /dev/null +++ b/R/clean_locations.R @@ -0,0 +1,125 @@ +#' Cleans location data +#' +#' @description Rearrange via joins to get into more usable hierarchy format, +#' these can then be joined to cases, contacts, etc for further analysis +#' +#' @param locations A [`tibble`] containing locations data. This is the data +#' returned from [`get_locations()`] +#' +#' @return A `tibble` containing the cleaned and rearranged location data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' clean_locations(locations = locations) +#' } +clean_locations <- function(locations) { + + # filter out delete and inactive (or NA) values + clean_locations <- dplyr::filter( + locations, + .data$deleted == FALSE | is.na(.data$deleted) + ) + clean_locations <- dplyr::filter( + clean_locations, + .data$active == TRUE | is.na(.data$active) + ) + + # add admin-level column + clean_locations <- dplyr::mutate( + .data = clean_locations, + admin_level = sub(".*LEVEL_", "", .data$geographicalLevelId) + ) + + # select columns + clean_locations <- dplyr::select( + .data = clean_locations, + location_id = "id", + "admin_level", + "name", + parent_location_id = "parentLocationId", + lat = "geoLocation.lat", + long = "geoLocation.lng" + ) + + clean_locations <- dplyr::filter( + .data = clean_locations, + !is.na(.data$admin_level) + ) + + # split locations data frames into separate data frames by admin level + locations_split <- dplyr::group_by( + .data = clean_locations, + .data$admin_level + ) + locations_split <- dplyr::group_split(locations_split) + + # rename columns by appending admin level + locations_split <- purrr::imap(.x = locations_split, .f = function(x, idx) { + colnames(x) <- paste("admin", idx - 1, colnames(x), sep = "_") + x + }) + + # add location_id and parent_location_id columns + locations_split <- purrr::imap(.x = locations_split, .f = function(x, idx) { + x$location_id <- dplyr::pull( + x, paste("admin", idx - 1, "location_id", sep = "_") + ) + x + }) + locations_split <- purrr::imap(.x = locations_split, .f = function(x, idx) { + if (idx - 1 != 0) { + x$parent_location_id <- dplyr::pull( + x, paste("admin", idx - 1, "parent_location_id", sep = "_") + ) + } + x + }) + + # loop over list of admin specific data frames and join them + for (i in seq(from = length(locations_split), to = 2L)) { + + for (x in 1:(i - 1)) { + + join_index <- i - x + + locations_split[[i]] <- dplyr::left_join( + x = locations_split[[i]], + y = locations_split[[join_index]], + by = c("parent_location_id" = "location_id") + ) + + # first table (admin level 0) does not contain parent_location_id so skip + if (join_index != 1) { + # use parent_location_id from right table + locations_split[[i]]$parent_location_id <- + locations_split[[i]]$parent_location_id.y + # remove extra parent_location_id column + locations_split[[i]]$parent_location_id.y <- NULL + } + } + locations_split[[i]]$parent_location_id <- NULL + } + + # bind the admin level tables by row + full <- do.call(dplyr::bind_rows, locations_split) + + # join cleaned location with new table + clean_locations <- left_join( + x = clean_locations, + y = full, + by = "location_id" + ) + + return(locations_clean) +} diff --git a/man/clean_locations.Rd b/man/clean_locations.Rd new file mode 100644 index 0000000..0e2578b --- /dev/null +++ b/man/clean_locations.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_locations.R +\name{clean_locations} +\alias{clean_locations} +\title{Cleans location data} +\usage{ +clean_locations(locations) +} +\arguments{ +\item{locations}{A \code{\link{tibble}} containing locations data. This is the data +returned from \code{\link[=get_locations]{get_locations()}}} +} +\value{ +A \code{tibble} containing the cleaned and rearranged location data. +} +\description{ +Rearrange via joins to get into more usable hierarchy format, +these can then be joined to cases, contacts, etc for further analysis +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" + +locations <- get_locations( + url = url, + username = username, + password = password +) + +clean_locations(locations = locations) +} +} diff --git a/tests/testthat/test-clean_locations.R b/tests/testthat/test-clean_locations.R new file mode 100644 index 0000000..9e6648c --- /dev/null +++ b/tests/testthat/test-clean_locations.R @@ -0,0 +1,35 @@ +test_that("clean_locations works as expected", { + skip("get_locations requires API call") + + res <- get_locations( + url = url, + username = username, + password = password + ) + res <- clean_locations(locations = res) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(11L, 24L)) + expect_true( + all(c( + "location_id", "admin_level", "name", "parent_location_id", "lat", + "long", "admin_0_location_id", "admin_0_admin_level", "admin_0_name", + "admin_0_parent_location_id", "admin_0_lat", "admin_0_long", + "admin_1_location_id", "admin_1_admin_level", "admin_1_name", + "admin_1_parent_location_id", "admin_1_lat", "admin_1_long", + "admin_2_location_id", "admin_2_admin_level", "admin_2_name", + "admin_2_parent_location_id", "admin_2_lat", "admin_2_long" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c( + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "numeric", "numeric" + ) + ) +}) From 05501773f15ca5c18d8792bc990f0850b5164196 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Feb 2023 12:08:34 +0000 Subject: [PATCH 151/203] added clean_case_address_history function, documentation and test --- R/clean_case_address_history.R | 98 +++++++++++++++++++ man/clean_case_address_history.Rd | 37 +++++++ .../test-clean_case_address_history.R | 31 ++++++ 3 files changed, 166 insertions(+) create mode 100644 R/clean_case_address_history.R create mode 100644 man/clean_case_address_history.Rd create mode 100644 tests/testthat/test-clean_case_address_history.R diff --git a/R/clean_case_address_history.R b/R/clean_case_address_history.R new file mode 100644 index 0000000..815d3fd --- /dev/null +++ b/R/clean_case_address_history.R @@ -0,0 +1,98 @@ +#' Extract address information from case data +#' +#' @description This function un-nests and cleans the address data and stores +#' it in a standalone table with all addresses, even if there is more than 1 +#' per person. +#' +#' @param cases A tibble with case data. Case data is returned by +#' [`get_cases()`]. +#' +#' @return A tibble with address information from cases data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' case_address_history <- clean_case_address_history(cases = cases) +#' } +clean_case_address_history <- function(cases) { + + cases_address_history_clean <- dplyr::filter( + .data = cases, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + cases_address_history_clean <- dplyr::select( + .data = cases_address_history_clean, + "id", "visualId", "addresses" + ) + + cases_address_history_clean <- tidyr::unnest( + data = cases_address_history_clean, + cols = "addresses", + names_sep = "_") + + cases_address_history_clean <- dplyr::select_all( + .tbl = cases_address_history_clean, + .funs = ~gsub("\\.", "_", tolower(.)) + ) + + cases_address_history_clean <- dplyr::select_if( + .tbl = cases_address_history_clean, + purrr::negate(is.list) + ) + + cases_address_history_clean <- dplyr::mutate( + .data = cases_address_history_clean, + addresses_typeid = sub(".*TYPE_", "", .data$addresses_typeid) + ) + + cases_address_history_clean <- dplyr::left_join( + cases_address_history_clean, + locations_clean, + by = c("addresses_locationid" = "location_id") + ) + + # bring in GPS from locations in case blank from case record, otherwise use + # case + cases_address_history_clean <- dplyr::mutate( + .data = cases_address_history_clean, + lat = dplyr::case_when( + is.na(addresses_geolocation_lat) ~ lat, + TRUE ~ addresses_geolocation_lat + ), + long = dplyr::case_when( + is.na(addresses_geolocation_lng) ~ lat, + TRUE ~ addresses_geolocation_lng + ) + ) + + cases_address_history_clean <- dplyr::select( + .data = cases_address_history_clean, + "id", + "visualid", + "addresses_locationid", + "addresses_typeid", + "lat", + "long", + address = "addresses_addressline1", + postal_code = "addresses_postalcode", + city = "addresses_city", + telephone = "addresses_phonenumber", + email = "addresses_emailaddress", + dplyr::matches("^admin_.*name$") + ) + + return(cases_address_history_clean) +} diff --git a/man/clean_case_address_history.Rd b/man/clean_case_address_history.Rd new file mode 100644 index 0000000..cd9369f --- /dev/null +++ b/man/clean_case_address_history.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_case_address_history.R +\name{clean_case_address_history} +\alias{clean_case_address_history} +\title{Extract address information from case data} +\usage{ +clean_case_address_history(cases) +} +\arguments{ +\item{cases}{A tibble with case data. Case data is returned by +\code{\link[=get_cases]{get_cases()}}.} +} +\value{ +A tibble with address information from cases data. +} +\description{ +This function un-nests and cleans the address data and stores +it in a standalone table with all addresses, even if there is more than 1 +per person. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +case_address_history <- clean_case_address_history(cases = cases) +} +} diff --git a/tests/testthat/test-clean_case_address_history.R b/tests/testthat/test-clean_case_address_history.R new file mode 100644 index 0000000..a0c5467 --- /dev/null +++ b/tests/testthat/test-clean_case_address_history.R @@ -0,0 +1,31 @@ +test_that("clean_case_address_history works as expected", { + skip("get_cases requires API call") + + res <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + res <- clean_case_address_history(cases = res) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(15L, 14L)) + expect_true( + all(c( + "id", "visualid", "addresses_locationid", "addresses_typeid", "lat", + "long", "address", "postal_code", "city", "telephone", "email", + "admin_0_name", "admin_1_name", "admin_2_name" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c( + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "character", + "character", "character", "character" + ) + ) +}) From 7e375e68a415021900cf1b660906367f664c29e7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Feb 2023 13:50:04 +0000 Subject: [PATCH 152/203] added clean_case_med_history function, documentation and test --- R/clean_case_med_history.R | 79 ++++++++++++++++++++ man/clean_case_med_history.Rd | 36 +++++++++ tests/testthat/test-clean_case_med_history.R | 30 ++++++++ 3 files changed, 145 insertions(+) create mode 100644 R/clean_case_med_history.R create mode 100644 man/clean_case_med_history.Rd create mode 100644 tests/testthat/test-clean_case_med_history.R diff --git a/R/clean_case_med_history.R b/R/clean_case_med_history.R new file mode 100644 index 0000000..82c335e --- /dev/null +++ b/R/clean_case_med_history.R @@ -0,0 +1,79 @@ +#' Extracts and cleans medical history from case data +#' +#' @description This function un-nests and cleans date ranges of isolation and +#' hospitalization history and stores it in a standalone table. +#' +#' @param cases A tibble with case data. Case data is returned by +#' [`get_cases()`]. +#' +#' @return A tibble with information on isolation and hospitalization history. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' cases_med_history <- clean_case_med_history(cases = cases) +#' } +clean_case_med_history <- function(cases) { + + cases_dateranges_history_clean <- dplyr::filter( + .data = cases, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # cannot unnest on mix of data frames and lists so change empty lists to empty + # data frames + cases_dateranges_history_clean$dateRanges <- purrr::map( + cases_dateranges_history_clean$dateRanges, + .f = function(x) { + if (length(x) == 0) x <- data.frame() + x + } + ) + + cases_dateranges_history_clean <- tidyr::unnest( + data = cases_dateranges_history_clean, + cols = dateRanges, + names_sep = "_" + ) + + cases_dateranges_history_clean <- dplyr::select_at( + .tbl = cases_dateranges_history_clean, + dplyr::vars(id, visualId, starts_with("dateRanges")), + tolower + ) + + cases_dateranges_history_clean <- dplyr::mutate( + .data = cases_dateranges_history_clean, + dateranges_typeid = sub(".*TYPE_", "", dateranges_typeid) + ) + + cases_dateranges_history_clean <- dplyr::mutate( + .data = cases_dateranges_history_clean, + dateranges_centername = sub(".*NAME_", "", dateranges_centername) + ) + + cases_dateranges_history_clean <- dplyr::mutate_at( + .tbl = cases_dateranges_history_clean, + dplyr::vars(dateranges_startdate, dateranges_enddate), + as.Date + ) + + cases_dateranges_history_clean <- dplyr::select_if( + .tbl = cases_dateranges_history_clean, + purrr::negate(is.list) + ) + + return(cases_dateranges_history_clean) +} diff --git a/man/clean_case_med_history.Rd b/man/clean_case_med_history.Rd new file mode 100644 index 0000000..bf1fa66 --- /dev/null +++ b/man/clean_case_med_history.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_case_med_history.R +\name{clean_case_med_history} +\alias{clean_case_med_history} +\title{Extracts and cleans medical history from case data} +\usage{ +clean_case_med_history(cases) +} +\arguments{ +\item{cases}{A tibble with case data. Case data is returned by +\code{\link[=get_cases]{get_cases()}}.} +} +\value{ +A tibble with information on isolation and hospitalization history. +} +\description{ +This function un-nests and cleans date ranges of isolation and +hospitalization history and stores it in a standalone table. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +cases_med_history <- clean_case_med_history(cases = cases) +} +} diff --git a/tests/testthat/test-clean_case_med_history.R b/tests/testthat/test-clean_case_med_history.R new file mode 100644 index 0000000..895dcd0 --- /dev/null +++ b/tests/testthat/test-clean_case_med_history.R @@ -0,0 +1,30 @@ +test_that("clean_case_med_history works as expected", { + skip("get_cases requires API call") + + res <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + res <- clean_case_med_history(cases = res) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(7L, 9L)) + expect_true( + all(c( + "id", "visualid", "dateranges_typeid", "dateranges_centername", + "dateranges_locationid", "dateranges_startdate", "dateranges_enddate", + "dateranges_comments", "dateranges_id" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c( + "character", "character", "character", "character", "character", "Date", + "Date", "logical", "logical" + ) + ) +}) From ef57c7e5db11bf5ba2d0a3af947822257665aa56 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Feb 2023 17:19:25 +0000 Subject: [PATCH 153/203] added clean_case_vax_history function, documentation and test --- R/clean_case_vax_history.R | 78 ++++++++++++++++++++ man/clean_case_vax_history.Rd | 35 +++++++++ tests/testthat/test-clean_case_vax_history.R | 26 +++++++ 3 files changed, 139 insertions(+) create mode 100644 R/clean_case_vax_history.R create mode 100644 man/clean_case_vax_history.Rd create mode 100644 tests/testthat/test-clean_case_vax_history.R diff --git a/R/clean_case_vax_history.R b/R/clean_case_vax_history.R new file mode 100644 index 0000000..d70f9c8 --- /dev/null +++ b/R/clean_case_vax_history.R @@ -0,0 +1,78 @@ +#' Cleans vaccination data from case data +#' +#' @description Cleans and un-nests vaccination history, where vaccination is +#' complete, from case data. Case data is returned from [`get_cases()`]. +#' +#' @param cases A tibble with address information from cases data. +#' +#' @return A tibble with cleaned and un-nested vaccination history data. +#' @export +#' +#' @examples +#' #' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' vax_history <- clean_case_vax_history(cases = cases) +#' } +clean_case_vax_history <- function(cases) { + + cases_vacc_history_clean <- dplyr::filter( + .data = cases, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # cannot unnest on mix of data frames and lists so change empty lists to empty + # data frames + cases_vacc_history_clean$vaccinesReceived <- purrr::map( + cases_vacc_history_clean$vaccinesReceived, + .f = function(x) { + if (length(x) == 0) x <- data.frame() + x + } + ) + + cases_vacc_history_clean <- tidyr::unnest( + data = cases_vacc_history_clean, + "vaccinesReceived", + names_sep = "_" + ) + + cases_vacc_history_clean <- dplyr::select( + .data = cases_vacc_history_clean, + "id", + "visualId", + dplyr::starts_with("vaccinesReceived") + ) + + cases_vacc_history_clean <- dplyr::rename_with( + .data = cases_vacc_history_clean, + .fn = tolower + ) + + cases_vacc_history_clean <- dplyr::mutate( + .data = cases_vacc_history_clean, + vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) + ) + + cases_vacc_history_clean <- dplyr::mutate( + .data = cases_vacc_history_clean, + vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + ) + + cases_vacc_history_clean <- dplyr::mutate( + .data = cases_vacc_history_clean, + dplyr::across("vaccinesreceived_date", as.Date) + ) + + return(cases_vacc_history_clean) +} diff --git a/man/clean_case_vax_history.Rd b/man/clean_case_vax_history.Rd new file mode 100644 index 0000000..c72e851 --- /dev/null +++ b/man/clean_case_vax_history.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_case_vax_history.R +\name{clean_case_vax_history} +\alias{clean_case_vax_history} +\title{Cleans vaccinations data from case data} +\usage{ +clean_case_vax_history(cases) +} +\arguments{ +\item{cases}{A tibble with address information from cases data.} +} +\value{ +A tibble with cleaned and un-nested vaccination history data. +} +\description{ +Cleans and un-nests vaccination history, where vaccination is +complete, from case data. Case data is returned from \code{\link[=get_cases]{get_cases()}}. +} +\examples{ +#' \dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +vax_history <- clean_case_vax_history(cases = cases) +} +} diff --git a/tests/testthat/test-clean_case_vax_history.R b/tests/testthat/test-clean_case_vax_history.R new file mode 100644 index 0000000..c2b9fc2 --- /dev/null +++ b/tests/testthat/test-clean_case_vax_history.R @@ -0,0 +1,26 @@ +test_that("clean_case_vax_history works as expected", { + skip("get_cases requires API call") + + res <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + res <- clean_case_vax_history(cases = res) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 5L)) + expect_true( + all(c( + "id", "visualid", "vaccinesreceived_vaccine", "vaccinesreceived_status", + "vaccinesreceived_date" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "character", "Date") + ) +}) From 8589d3a5cc5cc278d7c39ee9acf61a4ca7679f91 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Mar 2023 15:42:52 +0000 Subject: [PATCH 154/203] added clean_cases function, documentation and test --- R/clean_cases.R | 245 ++++++++++++++++++++++++++++++ man/clean_cases.Rd | 50 ++++++ tests/testthat/test-clean_cases.R | 51 +++++++ 3 files changed, 346 insertions(+) create mode 100644 R/clean_cases.R create mode 100644 man/clean_cases.Rd create mode 100644 tests/testthat/test-clean_cases.R diff --git a/R/clean_cases.R b/R/clean_cases.R new file mode 100644 index 0000000..e99748c --- /dev/null +++ b/R/clean_cases.R @@ -0,0 +1,245 @@ +#' Cleans case data +#' +#' @description Cleans and un-nests case data. Case data is returned by +#' [`get_cases()`]. +#' +#' @param cases A `tibble` containing the case data. +#' +#' @return A `tibble` containing the cleaned case data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' # other cleaned data required for `clean_cases()` +#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases) +#' cases_address_history_clean <- clean_case_address_history(cases = cases) +#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +#' +#' cases_clean <- clean_cases( +#' cases = cases, +#' cases_address_history_clean = cases_address_history_clean, +#' cases_vacc_history_clean = cases_vacc_history_clean, +#' cases_dateranges_history_clean = cases_dateranges_history_clean +#' ) +#' } +clean_cases <- function(cases, + cases_address_history_clean, + cases_vacc_history_clean, + cases_dateranges_history_clean) { + + # Remove all deleted records + cases_clean <- dplyr::filter( + .data = cases, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + cases_clean <- dplyr::select_if( + .tbl = cases_clean, + purrr::negate(is.list) + ) + + # take out all that are not core variables, otherwise diff versions and + # problems exporting to excel + cases_clean <- dplyr::select( + .data = cases_clean, + -dplyr::contains("questionnaireAnswers") + ) + + # standardize column name syntax + cases_clean <- janitor::clean_names(cases_clean) + + # label timestamps as datetime + cases_clean <- dplyr::rename( + .data = cases_clean, + date_of_birth = "dob", + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + # take out other unnecessary vars that are unnecessary and may confuse + # (i.e. was_case for cases) + cases_clean <- dplyr::select( + .data = cases_clean, + -c("is_date_of_onset_approximate", + "is_date_of_reporting_approximate", + "was_case", + "deleted", + "created_on") + ) + + #clean up all blank fields + cases_clean <- dplyr::mutate( + .data = cases_clean, + dplyr::across(dplyr::where(is.character), dplyr::na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + cases_clean <- dplyr::mutate_at( + .tbl = cases_clean, + dplyr::vars(dplyr::starts_with("date_")), list(~ as.Date(substr(., 1, 10))) + ) + cases_clean <- dplyr::mutate( + .data = cases_clean, + datetime_updated_at = as.POSIXct(datetime_updated_at, format = "%Y-%m-%dT%H:%M") + ) + cases_clean <- dplyr::mutate( + .data = cases_clean, + datetime_created_at = as.POSIXct(datetime_created_at,format="%Y-%m-%dT%H:%M") + ) + + # truncate responses of categorical vars so easier to read + cases_clean <- dplyr::mutate( + .data = cases_clean, + classification = sub(".*CLASSIFICATION_", "", classification), + gender = sub(".*GENDER_", "", gender), + occupation = sub(".*OCCUPATION_", "", occupation), + outcome = sub(".*OUTCOME_", "", outcome_id), + pregnancy_status = sub(".*STATUS_", "", pregnancy_status), + risk_level = sub(".*LEVEL_", "", risk_level) + ) + + cases_clean <- dplyr::mutate( + .data = cases_clean, + isolated = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ISOLATION"] ~ TRUE, TRUE ~ FALSE) + ) + + cases_clean <- dplyr::mutate( + .data = cases_clean, + hospitalized = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "HOSPITALIZATION"] ~ TRUE, TRUE ~ FALSE) + ) + + cases_clean <- dplyr::mutate( + .data = cases_clean, + icu = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ICU_ADMISSION"] ~ TRUE, TRUE ~ FALSE) + ) + + cases_address_history_clean <- dplyr::filter( + .data = cases_address_history_clean, + addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + ) + + # join in current address from address history, only current place of residence + cases_clean <- dplyr::left_join(x = cases_clean, y = cases_address_history_clean, by = "id") + + # join in info from vacc block + cases_clean <- dplyr::mutate( + .data = cases_clean, + vaccinated = dplyr::case_when(id %in% cases_vacc_history_clean$id[cases_vacc_history_clean$vaccinesreceived_status == "VACCINATED"] ~ TRUE, TRUE ~ FALSE) + ) + + # force NA ages to appear as NA, not as 0 like sometimes occurs + cases_clean <- dplyr::mutate( + .data = cases_clean, + age_years = as.numeric(age_years) + ) + cases_clean <- dplyr::mutate( + .data = cases_clean, + age_years = dplyr::na_if(age_years, 0) + ) + cases_clean <- dplyr::mutate( + .data = cases_clean, + age_months = as.numeric(age_months) + ) + cases_clean <- dplyr::mutate( + .data = cases_clean, + age_months = dplyr::na_if(age_months, 0) + ) + + # standardize age vars into just one var, round by 1 decimal + cases_clean <- dplyr::mutate( + .data = cases_clean, + age = dplyr::case_when(!is.na(age_months) ~ round(age_months / 12, digits = 1), TRUE ~ age_years) + ) + + # WHO age categories updated Sept 2020: + # 0-4, 5-9, 10-14, 15-19, 20-29, 30-39, 40-49, 50-59, 60-64, 65-69, 70-74, + # 75-79, 80+ + # these categories below match that of detailed WHO surveillance dash: + # <5, 5-14, 15-24, 25-64, 65+ + cases_clean <- dplyr::mutate( + .data = cases_clean, + age_class = factor( + case_when( + age <= 4 ~ "<5", + age <= 14 ~ "5-14", + age <= 24 ~ "15-24", + age <= 64 ~ "25-64", + is.finite(age) ~ "65+", + TRUE ~ "unknown" + ), + levels = c( + "<5", + "5-14", + "15-24", + "25-64", + "65+", + "unknown" + ) + ), + age_class = factor( + age_class, + levels = rev(levels(age_class)) + ) + ) + + # organize order of vars, only bring in what we need, take away confusing vars + cases_clean <- dplyr::select( + .data = cases_clean, + id, # identifier + visual_id, # identifier + classification, # identifier + first_name, # demographics + middle_name, # demographics + last_name, # demographics + gender, # demographics + age, # demographics + age_class, # demographics + occupation, # demographics + pregnancy_status, # demographics + date_of_reporting, # dates + date_of_onset, # dates + date_of_infection, # dates + date_become_case, # dates + date_of_burial, # dates + was_contact, # epi + risk_level, # epi + risk_reason, # epi + safe_burial, # epi + transfer_refused, # epi + responsible_user_id, # assigned contact tracer + matches("^admin_.*name$"), # address + lat, # address + long, # address + address, # address + postal_code, # address + city, # address + telephone, # address + email, # address + vaccinated, # vaccination & dateRanges block + isolated, # vaccination & dateRanges block + hospitalized, # vaccination & dateRanges block + icu, # vaccination & dateRanges block + outcome, # outcome + date_of_outcome, # outcome + location_id = addresses_locationid, # uuid in case need later for joining of whatever sort + created_by, # record modification + datetime_created_at, # record modification + updated_by, # record modification + datetime_updated_at # record modification + ) + + return(cases_clean) +} diff --git a/man/clean_cases.Rd b/man/clean_cases.Rd new file mode 100644 index 0000000..96b9a54 --- /dev/null +++ b/man/clean_cases.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_cases.R +\name{clean_cases} +\alias{clean_cases} +\title{Cleans case data} +\usage{ +clean_cases( + cases, + cases_address_history_clean, + cases_vacc_history_clean, + cases_dateranges_history_clean +) +} +\arguments{ +\item{cases}{A \code{tibble} containing the case data.} +} +\value{ +A \code{tibble} containing the cleaned case data. +} +\description{ +Cleans and un-nests case data. Case data is returned by +\code{\link[=get_cases]{get_cases()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +# other cleaned data required for `clean_cases()` +cases_vacc_history_clean <- clean_case_vax_history(cases = cases) +cases_address_history_clean <- clean_case_address_history(cases = cases) +cases_dateranges_history_clean <- clean_case_med_history(cases = cases) + +cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean +) +} +} diff --git a/tests/testthat/test-clean_cases.R b/tests/testthat/test-clean_cases.R new file mode 100644 index 0000000..a29ce82 --- /dev/null +++ b/tests/testthat/test-clean_cases.R @@ -0,0 +1,51 @@ +test_that("clean_cases works as expected", { + skip("get_cases requires API call") + + cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + cases_vacc_history_clean <- clean_case_vax_history(cases = cases) + cases_address_history_clean <- clean_case_address_history(cases = cases) + cases_dateranges_history_clean <- clean_case_med_history(cases = cases) + + res <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(14L, 43L)) + expect_true( + all(c( + "id", "visual_id", "classification", "first_name", "middle_name", + "last_name", "gender", "age", "age_class", "occupation", + "pregnancy_status", "date_of_reporting", "date_of_onset", + "date_of_infection", "date_become_case", "date_of_burial", "was_contact", + "risk_level", "risk_reason", "safe_burial", "transfer_refused", + "responsible_user_id", "admin_0_name", "admin_1_name", "admin_2_name", + "lat", "long", "address", "postal_code", "city", "telephone", "email", + "vaccinated", "isolated", "hospitalized", "icu", "outcome", + "date_of_outcome", "location_id", "created_by", "datetime_created_at", + "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + rep("character", 7), "numeric", "factor", "character", "character", + rep("Date", 5), "logical", "character", "character", "logical", "logical", + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "character", + "logical", "logical", "logical", "logical", "character", "Date", + "character", "character", "POSIXct", "character", "POSIXct" + ) + ) +}) From 5bcb9dbba8aac9899f2787d3c9a7b68c75f8be72 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Mar 2023 16:24:27 +0000 Subject: [PATCH 155/203] added cases_from_contacts function, documentation and test --- R/cases_from_contacts.R | 106 ++++++++++++++++++++++ man/cases_from_contacts.Rd | 47 ++++++++++ tests/testthat/test-cases_from_contacts.R | 53 +++++++++++ 3 files changed, 206 insertions(+) create mode 100644 R/cases_from_contacts.R create mode 100644 man/cases_from_contacts.Rd create mode 100644 tests/testthat/test-cases_from_contacts.R diff --git a/R/cases_from_contacts.R b/R/cases_from_contacts.R new file mode 100644 index 0000000..7cd9fdb --- /dev/null +++ b/R/cases_from_contacts.R @@ -0,0 +1,106 @@ +#' Pull out all cases that used to be contacts +#' +#' @param cases_clean The cleaned case data. Case data is returned by +#' [`get_cases()`] and cleaned by [`clean_cases()`]. +#' +#' @return A tibble containing the cases that used to be contacts. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' # other cleaned data required for `clean_cases()` +#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases) +#' cases_address_history_clean <- clean_case_address_history(cases = cases) +#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +#' +#' cases_clean <- clean_cases( +#' cases = cases, +#' cases_address_history_clean = cases_address_history_clean, +#' cases_vacc_history_clean = cases_vacc_history_clean, +#' cases_dateranges_history_clean = cases_dateranges_history_clean +#' ) +#' +#' cases_from_contacts <- cases_from_contacts(cases_clean = cases_clean) +#' } +cases_from_contacts <- function(cases_clean) { + + contacts_becoming_cases <- dplyr::filter( + .data = cases_clean, + .data$was_contact == TRUE + ) + + # set this status to became case and no longer active + contacts_becoming_cases <- dplyr::mutate( + .data = contacts_becoming_cases, + follow_up_status = "BECAME_CASE", + was_case = NA, + date_of_last_contact = NA, + follow_up_team_id = NA, + relationship_exposure_type = NA, + relationship_context_of_transmission = NA, + relationship_exposure_duration = NA, + relationship_exposure_frequency = NA, + relationship_certainty_level = NA, + relationship_cluster_id = NA, + ) + + # organize order of vars, only bring in what we need, take away confusing vars + contacts_becoming_cases <- dplyr::select( + .data = contacts_becoming_cases, + "id", # identifier + "visual_id", # identifier + "classification", # identifier + "follow_up_status", # identifier + "first_name", # demographics + "middle_name", # demographics + "last_name", # demographics + "gender", # demographics + "age", # demographics + "age_class", # demographics + "occupation", # demographics + "pregnancy_status", # demographics + "date_of_reporting", # dates + "date_of_last_contact", # dates + "date_of_burial", # dates + "risk_level", # epi + "risk_reason", # epi + "responsible_user_id", # assigned contact tracer + "follow_up_team_id", # assigned contact tracer + dplyr::matches("^admin_.*name$"), # address + "lat", # address + "long", # address + "address", # address + "postal_code", # address + "city", # address + "telephone", # address + "email", # address + "vaccinated", + "outcome", # outcome + "date_of_outcome", # outcome + "relationship_exposure_type", + "relationship_context_of_transmission", + "relationship_exposure_duration", + "relationship_exposure_frequency", + "relationship_certainty_level", + "relationship_cluster_id", + "location_id", # uuid in case need later for joining of whatever sort + "created_by", # record modification + "datetime_created_at", # record modification + "updated_by", # record modification + "datetime_updated_at" # record modification + ) + + return(contacts_becoming_cases) +} diff --git a/man/cases_from_contacts.Rd b/man/cases_from_contacts.Rd new file mode 100644 index 0000000..e9a91d6 --- /dev/null +++ b/man/cases_from_contacts.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cases_from_contacts.R +\name{cases_from_contacts} +\alias{cases_from_contacts} +\title{Pull out all cases that used to be contacts} +\usage{ +cases_from_contacts(cases_clean) +} +\arguments{ +\item{cases_clean}{The cleaned case data. Case data is returned by +\code{\link[=get_cases]{get_cases()}} and cleaned by \code{\link[=clean_cases]{clean_cases()}}.} +} +\value{ +A tibble containing the cases that used to be contacts. +} +\description{ +Pull out all cases that used to be contacts +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +# other cleaned data required for `clean_cases()` +cases_vacc_history_clean <- clean_case_vax_history(cases = cases) +cases_address_history_clean <- clean_case_address_history(cases = cases) +cases_dateranges_history_clean <- clean_case_med_history(cases = cases) + +cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean +) + +cases_from_contacts <- cases_from_contacts(cases_clean = cases_clean) +} +} diff --git a/tests/testthat/test-cases_from_contacts.R b/tests/testthat/test-cases_from_contacts.R new file mode 100644 index 0000000..f5c3570 --- /dev/null +++ b/tests/testthat/test-cases_from_contacts.R @@ -0,0 +1,53 @@ +test_that("cases_from_contacts works as expected", { + skip("get_cases requires API call") + + cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + cases_vacc_history_clean <- clean_case_vax_history(cases = cases) + cases_address_history_clean <- clean_case_address_history(cases = cases) + cases_dateranges_history_clean <- clean_case_med_history(cases = cases) + + cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean + ) + + res <- cases_from_contacts(cases_clean = cases_clean) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(3L, 43L)) + expect_true( + all(c( + "id", "visual_id", "classification", "follow_up_status", "first_name", + "middle_name", "last_name", "gender", "age", "age_class", "occupation", + "pregnancy_status", "date_of_reporting", "date_of_last_contact", + "date_of_burial", "risk_level", "risk_reason", "responsible_user_id", + "follow_up_team_id", "admin_0_name", "admin_1_name", "admin_2_name", + "lat", "long", "address", "postal_code", "city", "telephone", "email", + "vaccinated", "outcome", "date_of_outcome", "relationship_exposure_type", + "relationship_context_of_transmission", "relationship_exposure_duration", + "relationship_exposure_frequency", "relationship_certainty_level", + "relationship_cluster_id", "location_id", "created_by", + "datetime_created_at", "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + rep("character", 8), "numeric", "factor", "character", "character", + "Date", "logical", "Date", "character", "character", "character", + "logical", "character", "character", "character", "numeric", "numeric", + rep("character", 5), "logical", "character", "Date", rep("logical", 6), + "character", "character", "POSIXct", "character", "POSIXct" + ) + ) +}) From d1974e3aed0974d1e17b5b298b5d7afbf69ce28b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Mar 2023 17:19:13 +0000 Subject: [PATCH 156/203] added clean_contact_address_history function, documentation and test --- R/clean_contact_address_history.R | 107 ++++++++++++++++++ man/clean_contact_address_history.Rd | 50 ++++++++ .../test-clean_contact_address_history.R | 42 +++++++ 3 files changed, 199 insertions(+) create mode 100644 R/clean_contact_address_history.R create mode 100644 man/clean_contact_address_history.Rd create mode 100644 tests/testthat/test-clean_contact_address_history.R diff --git a/R/clean_contact_address_history.R b/R/clean_contact_address_history.R new file mode 100644 index 0000000..15222b2 --- /dev/null +++ b/R/clean_contact_address_history.R @@ -0,0 +1,107 @@ +#' Extracts address information from contact data +#' +#' @description This function un-nests and cleans the address data and stores +#' it in a standalone table with all addresses, even if there is more than 1 +#' per person. +#' +#' @param contacts A tibble with contacts data. Contacts data is returned by +#' [`get_contacts()`]. +#' @param locations_clean A tibble with cleaned locations data. Locations data +#' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' +#' @return A tibble with address information from contacts data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' locations_clean <- clean_locations(locations = locations) +#' +#' contact_address_history <- clean_contact_address_history( +#' contacts = contacts, +#' locations_clean = locations_clean +#' ) +#' } +clean_contact_address_history <- function(contacts, + locations_clean) { + + contacts_address_history_clean <- dplyr::filter( + .data = contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + contacts_address_history_clean <- dplyr::select( + .data = contacts_address_history_clean, + "id", "visualId", "addresses" + ) + + contacts_address_history_clean <- tidyr::unnest( + data = contacts_address_history_clean, + cols = "addresses", + names_sep = "_" + ) + + contacts_address_history_clean <- dplyr::select_all( + .tbl = contacts_address_history_clean, + .funs = ~gsub("\\.", "_", tolower(.)) + ) + + contacts_address_history_clean <- dplyr::select_if( + .tbl = contacts_address_history_clean, + purrr::negate(is.list) + ) + + contacts_address_history_clean <- dplyr::mutate( + .data = contacts_address_history_clean, + addresses_typeid = sub(".*TYPE_", "", addresses_typeid) + ) + + contacts_address_history_clean <- dplyr::left_join( + x = contacts_address_history_clean, + y = locations_clean, + by = c("addresses_locationid" = "location_id") + ) + + # bring in GPS from locations if blank in contact record, otherwise use + # contact address block + contacts_address_history_clean <- dplyr::mutate( + .data = contacts_address_history_clean, + lat = dplyr::case_when( + is.na(addresses_geolocation_lat) ~ lat, TRUE ~ addresses_geolocation_lat), + long = dplyr::case_when( + is.na(addresses_geolocation_lng) ~ lat, TRUE ~ addresses_geolocation_lng) + ) + + contacts_address_history_clean <- dplyr::select( + .data = contacts_address_history_clean, + "id", + "addresses_locationid", + "addresses_typeid", + "lat", + "long", + "address" = addresses_addressline1, + "postal_code" = addresses_postalcode, + "city" = addresses_city, + "telephone" = addresses_phonenumber, + "email" = addresses_emailaddress, + dplyr::matches("^admin_.*name$") + ) + + return(contacts_address_history_clean) +} diff --git a/man/clean_contact_address_history.Rd b/man/clean_contact_address_history.Rd new file mode 100644 index 0000000..1209f5a --- /dev/null +++ b/man/clean_contact_address_history.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contact_address_history.R +\name{clean_contact_address_history} +\alias{clean_contact_address_history} +\title{Extracts address information from contact data} +\usage{ +clean_contact_address_history(contacts, locations_clean) +} +\arguments{ +\item{contacts}{A tibble with contacts data. Contacts data is returned by +\code{\link[=get_contacts]{get_contacts()}}.} + +\item{locations_clean}{A tibble with cleaned locations data. Locations data +is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} +} +\value{ +A tibble with address information from contacts data. +} +\description{ +This function un-nests and cleans the address data and stores +it in a standalone table with all addresses, even if there is more than 1 +per person. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) +locations_clean <- clean_locations(locations = locations) + +contact_address_history <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean +) +} +} diff --git a/tests/testthat/test-clean_contact_address_history.R b/tests/testthat/test-clean_contact_address_history.R new file mode 100644 index 0000000..50930ab --- /dev/null +++ b/tests/testthat/test-clean_contact_address_history.R @@ -0,0 +1,42 @@ +test_that("clean_contact_address_history works as expected", { + skip("get_contacts requires API call") + + contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + locations_clean <- clean_locations(locations = locations) + + res <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(15L, 13L)) + expect_true( + all(c( + "id", "addresses_locationid", "addresses_typeid", "lat", "long", + "address", "postal_code", "city", "telephone", "email", "admin_0_name", + "admin_1_name", "admin_2_name" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c( + "character", "character", "character", "numeric", "numeric", + "character", "logical", "character", "character", "character", + "character", "character", "character" + ) + ) +}) From 19ddad5d7b473a4b37512075809dbae9f302cef0 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Tue, 14 Mar 2023 18:01:09 +0000 Subject: [PATCH 157/203] added clean_contact_vax_history function, documentation and test --- R/clean_contact_vax_history.R | 77 +++++++++++++++++++ man/clean_contact_vax_history.Rd | 36 +++++++++ .../testthat/test-clean_contact_vax_history.R | 27 +++++++ 3 files changed, 140 insertions(+) create mode 100644 R/clean_contact_vax_history.R create mode 100644 man/clean_contact_vax_history.Rd create mode 100644 tests/testthat/test-clean_contact_vax_history.R diff --git a/R/clean_contact_vax_history.R b/R/clean_contact_vax_history.R new file mode 100644 index 0000000..32ea9da --- /dev/null +++ b/R/clean_contact_vax_history.R @@ -0,0 +1,77 @@ +#' Cleans vaccination data from contact data +#' +#' @description Cleans and un-nests vaccination history, where vaccination is +#' complete, from contact data. Contact data is returned from +#' [`get_contacts()`]. +#' +#' @param contacts A tibble with address information from contact data. +#' +#' @return A tibble with cleaned and un-nested vaccination history data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' vax_history <- clean_contact_vax_history(contacts = contacts) +#' } +clean_contact_vax_history <- function(contacts) { + + contacts_vax_history_clean <- dplyr::filter( + .data = contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # cannot unnest on mix of data frames and lists so change empty lists to empty + # data frames + contacts_vax_history_clean$vaccinesReceived <- purrr::map( + contacts_vax_history_clean$vaccinesReceived, + .f = function(x) { + if (length(x) == 0) x <- data.frame() + x + } + ) + + contacts_vax_history_clean <- tidyr::unnest( + data = contacts_vax_history_clean, + cols = "vaccinesReceived", + names_sep = "_" + ) + + contacts_vax_history_clean <- dplyr::select_at( + .tbl = contacts_vax_history_clean, + dplyr::vars( + "id", + "visualId", + dplyr::starts_with("vaccinesReceived") + ), + .funs = tolower + ) + + contacts_vax_history_clean <- dplyr::mutate( + .data = contacts_vax_history_clean, + vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) + ) + + contacts_vax_history_clean <- dplyr::mutate( + .data = contacts_vax_history_clean, + vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + ) + + contacts_vax_history_clean <- dplyr::mutate_at( + .tbl = contacts_vax_history_clean, + dplyr::vars(vaccinesreceived_date), as.Date + ) + + return(contacts_vax_history_clean) +} diff --git a/man/clean_contact_vax_history.Rd b/man/clean_contact_vax_history.Rd new file mode 100644 index 0000000..a24cd21 --- /dev/null +++ b/man/clean_contact_vax_history.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contact_vax_history.R +\name{clean_contact_vax_history} +\alias{clean_contact_vax_history} +\title{Cleans vaccination data from contact data} +\usage{ +clean_contact_vax_history(contacts) +} +\arguments{ +\item{contacts}{A tibble with address information from contact data.} +} +\value{ +A tibble with cleaned and un-nested vaccination history data. +} +\description{ +Cleans and un-nests vaccination history, where vaccination is +complete, from contact data. Contact data is returned from +\code{\link[=get_contacts]{get_contacts()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +vax_history <- clean_contact_vax_history(contacts = contacts) +} +} diff --git a/tests/testthat/test-clean_contact_vax_history.R b/tests/testthat/test-clean_contact_vax_history.R new file mode 100644 index 0000000..3b53f8e --- /dev/null +++ b/tests/testthat/test-clean_contact_vax_history.R @@ -0,0 +1,27 @@ +test_that("clean_contact_vax_history works as expected", { + skip("get_contacts requires API call") + + contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + res <- clean_contact_vax_history(contacts = contacts) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 5L)) + expect_true( + all(c( + "id", "visualid", "vaccinesreceived_vaccine", "vaccinesreceived_date", + "vaccinesreceived_status" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "Date", "character") + ) +}) From aff39432d13f014100e3ac58aac7eb8bcd070a2a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 15 Mar 2023 14:32:36 +0000 Subject: [PATCH 158/203] added clean_contacts_of_contacts_address_history function, documentation and test --- ...ean_contacts_of_contacts_address_history.R | 107 ++++++++++++++++++ ...an_contacts_of_contacts_address_history.Rd | 53 +++++++++ ...ean_contacts_of_contacts_address_history.R | 41 +++++++ 3 files changed, 201 insertions(+) create mode 100644 R/clean_contacts_of_contacts_address_history.R create mode 100644 man/clean_contacts_of_contacts_address_history.Rd create mode 100644 tests/testthat/test-clean_contacts_of_contacts_address_history.R diff --git a/R/clean_contacts_of_contacts_address_history.R b/R/clean_contacts_of_contacts_address_history.R new file mode 100644 index 0000000..b749a71 --- /dev/null +++ b/R/clean_contacts_of_contacts_address_history.R @@ -0,0 +1,107 @@ +#' Extracts address information from contacts of contacts data +#' +#' @description This function un-nests and cleans the address data and stores +#' it in a standalone table with all addresses, even if there is more than 1 +#' per person. +#' +#' @param contacts_of_contacts A`tibble` with contacts of contacts data. +#' Contacts of contacts data is returned by [`get_contacts_of_contacts()`]. +#' @param locations_clean A `tibble` with cleaned location data. Location data +#' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' +#' @return A `tibble` with address information from contacts of contacts data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts_of_contacts <- get_contacts_of_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' locations_clean <- clean_locations(locations = locations) +#' +#' contact_of_contacts_add_hist <- clean_contacts_of_contacts_address_history( +#' contacts_of_contacts = contacts_of_contacts, +#' locations_clean = locations_clean +#' ) +#' } +clean_contacts_of_contacts_address_history <- function(contacts_of_contacts, + locations_clean) { + + coc_add_hist <- dplyr::filter( + .data = contacts_of_contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + coc_add_hist <- dplyr::select( + .data = coc_add_hist, + "id", "visualId", "addresses" + ) + + coc_add_hist <- tidyr::unnest( + data = coc_add_hist, + "addresses", + names_sep = "_" + ) + + coc_add_hist <- dplyr::select_all( + .tbl = coc_add_hist, + .funs = ~gsub("\\.", "_", tolower(.)) + ) + + coc_add_hist <- dplyr::select_if( + .tbl = coc_add_hist, + .predicate = purrr::negate(is.list) + ) + + coc_add_hist <- dplyr::mutate( + .data = coc_add_hist, + addresses_typeid = sub(".*TYPE_", "", addresses_typeid) + ) + + coc_add_hist <- dplyr::left_join( + x = coc_add_hist, + y = locations_clean, + by = c("addresses_locationid" = "location_id") + ) + + # bring in GPS from locations if blank in contact record, otherwise use + # contact address block + coc_add_hist <- dplyr::mutate( + .data = coc_add_hist, + lat = case_when( + is.na(addresses_geolocation_lat) ~ lat, TRUE ~ addresses_geolocation_lat), + long = case_when( + is.na(addresses_geolocation_lng) ~ lat, TRUE ~ addresses_geolocation_lng) + ) + + coc_add_hist <- dplyr::select( + .data = coc_add_hist, + "id", + "addresses_locationid", + "addresses_typeid", + "lat", + "long", + address = "addresses_addressline1", + postal_code = "addresses_postalcode", + city = "addresses_city", + telephone = "addresses_phonenumber", + email = "addresses_emailaddress", + dplyr::matches("^admin_.*name$") + ) + + return(coc_add_hist) +} diff --git a/man/clean_contacts_of_contacts_address_history.Rd b/man/clean_contacts_of_contacts_address_history.Rd new file mode 100644 index 0000000..a4923b4 --- /dev/null +++ b/man/clean_contacts_of_contacts_address_history.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contacts_of_contacts_address_history.R +\name{clean_contacts_of_contacts_address_history} +\alias{clean_contacts_of_contacts_address_history} +\title{Extracts address information from contacts of contacts data} +\usage{ +clean_contacts_of_contacts_address_history( + contacts_of_contacts, + locations_clean +) +} +\arguments{ +\item{contacts_of_contacts}{A\code{tibble} with contacts of contacts data. +Contacts of contacts data is returned by \code{\link[=get_contacts_of_contacts]{get_contacts_of_contacts()}}.} + +\item{locations_clean}{A \code{tibble} with cleaned location data. Location data +is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} +} +\value{ +A \code{tibble} with address information from contacts of contacts data. +} +\description{ +This function un-nests and cleans the address data and stores +it in a standalone table with all addresses, even if there is more than 1 +per person. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) +locations_clean <- clean_locations(locations = locations) + +contact_of_contacts_address_history <- clean_contacts_of_contacts_address_history( + contacts_of_contacts = contacts_of_contacts, + locations_clean = locations_clean +) +} +} diff --git a/tests/testthat/test-clean_contacts_of_contacts_address_history.R b/tests/testthat/test-clean_contacts_of_contacts_address_history.R new file mode 100644 index 0000000..c8a0589 --- /dev/null +++ b/tests/testthat/test-clean_contacts_of_contacts_address_history.R @@ -0,0 +1,41 @@ +test_that("clean_contacts_of_contacts_address_history works as expected", { + skip("get_contacts_of_contacts requires API call") + + contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + locations_clean <- clean_locations(locations = locations) + + res <- clean_contacts_of_contacts_address_history( + contacts_of_contacts = contacts_of_contacts, + locations_clean = locations_clean + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 13L)) + expect_true( + all(c( + "id", "addresses_locationid", "addresses_typeid", "lat", "long", + "address", "postal_code", "city", "telephone", "email", "admin_0_name", + "admin_1_name", "admin_2_name" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c( + "character", "character", "character", "numeric", "numeric", + rep("logical", 5), "character", "character", "character" + ) + ) +}) From 6f70b4deab73ea92d4e61e7860b24779cb3811d5 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 15 Mar 2023 14:51:24 +0000 Subject: [PATCH 159/203] added clean_contacts_of_contacts_vax_history function, documentation and test --- R/clean_contacts_of_contacts_vax_history.R | 67 +++++++++++++++++++ man/clean_contacts_of_contacts_vax_history.Rd | 39 +++++++++++ ...t-clean_contacts_of_contacts_vax_history.R | 29 ++++++++ 3 files changed, 135 insertions(+) create mode 100644 R/clean_contacts_of_contacts_vax_history.R create mode 100644 man/clean_contacts_of_contacts_vax_history.Rd create mode 100644 tests/testthat/test-clean_contacts_of_contacts_vax_history.R diff --git a/R/clean_contacts_of_contacts_vax_history.R b/R/clean_contacts_of_contacts_vax_history.R new file mode 100644 index 0000000..44e75c7 --- /dev/null +++ b/R/clean_contacts_of_contacts_vax_history.R @@ -0,0 +1,67 @@ +#' Cleans vaccination data from contacts of contacts data +#' +#' @description Cleans and un-nests vaccination history, where vaccination is +#' complete, from contacts of contacts data. Contacts of contacts data is +#' returned from [`get_contacts_of_contacts()`]. +#' +#' @param contacts_of_contacts A `tibble` with address information from contacts +#' of contacts data. +#' +#' @return A `tibble` with cleaned and un-nested vaccination history data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts_of_contacts <- get_contacts_of_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' vax_history <- clean_contacts_of_contacts_vax_history( +#' contacts_of_contacts = contacts_of_contacts +#' ) +#' } +clean_contacts_of_contacts_vax_history <- function(contacts_of_contacts) { + + coc_vacc_hist <- dplyr::filter( + .data = contacts_of_contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + coc_vacc_hist <- tidyr::unnest( + data = coc_vacc_hist, + cols = "vaccinesReceived", + names_sep = "_" + ) + + coc_vacc_hist <- dplyr::select_at( + .tbl = coc_vacc_hist, + .vars = dplyr::vars(id, visualId, dplyr::starts_with("vaccinesReceived")), + tolower + ) + + coc_vacc_hist <- dplyr::mutate( + .data = coc_vacc_hist, + vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) + ) + + coc_vacc_hist <- dplyr::mutate( + .data = coc_vacc_hist, + vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + ) + + coc_vacc_hist <- dplyr::mutate_at( + .tbl = coc_vacc_hist, + dplyr::vars(vaccinesreceived_date), + as.Date + ) + + return(coc_vacc_hist) +} diff --git a/man/clean_contacts_of_contacts_vax_history.Rd b/man/clean_contacts_of_contacts_vax_history.Rd new file mode 100644 index 0000000..43cb262 --- /dev/null +++ b/man/clean_contacts_of_contacts_vax_history.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contacts_of_contacts_vax_history.R +\name{clean_contacts_of_contacts_vax_history} +\alias{clean_contacts_of_contacts_vax_history} +\title{Cleans vaccination data from contacts of contacts data} +\usage{ +clean_contacts_of_contacts_vax_history(contacts_of_contacts) +} +\arguments{ +\item{contacts_of_contacts}{A \code{tibble} with address information from contacts +of contacts data.} +} +\value{ +A \code{tibble} with cleaned and un-nested vaccination history data. +} +\description{ +Cleans and un-nests vaccination history, where vaccination is +complete, from contacts of contacts data. Contacts of contacts data is +returned from \code{\link[=get_contacts_of_contacts]{get_contacts_of_contacts()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +vax_history <- clean_contacts_of_contacts_vax_history( + contacts_of_contacts = contacts_of_contacts +) +} +} diff --git a/tests/testthat/test-clean_contacts_of_contacts_vax_history.R b/tests/testthat/test-clean_contacts_of_contacts_vax_history.R new file mode 100644 index 0000000..e537ec7 --- /dev/null +++ b/tests/testthat/test-clean_contacts_of_contacts_vax_history.R @@ -0,0 +1,29 @@ +test_that("clean_contacts_of_contacts_vax_history works as expected", { + skip("get_contacts_of_contacts requires API call") + + contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + res <- clean_contacts_of_contacts_vax_history( + contacts_of_contacts = contacts_of_contacts + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(1L, 5L)) + expect_true( + all(c( + "id", "visualid", "vaccinesreceived_vaccine", "vaccinesreceived_date", + "vaccinesreceived_status" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "character", "character", "Date", "character") + ) +}) From 78d1f539fda7d4fa69c5a3398340832108403bf1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 15 Mar 2023 16:04:03 +0000 Subject: [PATCH 160/203] added clean_contacts_of_contacts function, documentation and test (WIP as function currently errors) --- R/clean_contacts_of_contacts.R | 229 ++++++++++++++++++ man/clean_contacts_of_contacts.Rd | 69 ++++++ .../test-clean_contacts_of_contacts.R | 62 +++++ 3 files changed, 360 insertions(+) create mode 100644 R/clean_contacts_of_contacts.R create mode 100644 man/clean_contacts_of_contacts.Rd create mode 100644 tests/testthat/test-clean_contacts_of_contacts.R diff --git a/R/clean_contacts_of_contacts.R b/R/clean_contacts_of_contacts.R new file mode 100644 index 0000000..c9c6444 --- /dev/null +++ b/R/clean_contacts_of_contacts.R @@ -0,0 +1,229 @@ +#' Clean contacts of contacts data +#' +#' @description Cleans and un-nests contacts of contacts data. Contacts of +#' contacts data is returned by [`get_contacts_of_contacts()`]. +#' +#' @param contacts_of_contacts A `tibble` containing the contacts of contacts +#' data. +#' @param contacts_of_contacts_address_history_clean A `tibble` containing the +#' cleaned address history data from contacts of contacts (data is cleaned by +#' [`clean_contacts_of_contacts_address_history()`]). +#' @param contacts_of_contacts_vacc_history_clean A `tibble` containing the +#' cleaned vaccination history from contacts of contacts (data is cleaned by +#' [`clean_contacts_of_contacts_vax_history()`]). +#' +#' @return A `tibble` containing the cleaned contacts of contacts data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts_of_contacts <- get_contacts_of_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' locations_clean <- clean_locations(locations = locations) +#' +#' contacts_of_contacts_address_history_clean <- clean_contacts_of_contacts_address_history( +#' contacts_of_contacts = contacts_of_contacts, +#' locations_clean = locations_clean +#' ) +#' +#' contacts_of_contacts_vacc_history_clean <- clean_contacts_of_contacts_vax_history( +#' contacts_of_contacts = contacts_of_contacts +#' ) +#' +#' contacts_of_contacts_clean <- clean_contacts_of_contacts( +#' contacts_of_contacts = contacts_of_contacts, +#' contacts_of_contacts_address_history_clean = contacts_of_contacts_address_history_clean, +#' contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean +#' ) +#' } +clean_contacts_of_contacts <- function(contacts_of_contacts, + contacts_of_contacts_address_history_clean, + contacts_of_contacts_vacc_history_clean) { + + # Remove all deleted records + coc_clean <- dplyr::filter( + .data = contacts_of_contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + coc_clean <- dplyr::select_if( + .tbl = coc_clean, + .predicate = purrr::negate(is.list) + ) + + # standardize column name syntax + coc_clean <- janitor::clean_names(dat = coc_clean) + + # label timestamps as datetime + coc_clean <- dplyr::rename( + .data = coc_clean, + date_of_birth = "dob", + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + # clean up all character fields + coc_clean <- dplyr::mutate( + .data = coc_clean, + dplyr::across(dplyr::where(is.character), na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + coc_clean <- dplyr::mutate_at( + .tbl = coc_clean, + .vars = dplyr::vars(dplyr::starts_with("date_")), + list(~ as.Date(substr(., 1, 10))) + ) + + coc_clean <- dplyr::mutate( + .data = coc_clean, + datetime_updated_at = as.POSIXct(datetime_updated_at,format="%Y-%m-%dT%H:%M") + ) + + coc_clean <- dplyr::mutate( + .data = coc_clean, + datetime_created_at = as.POSIXct(datetime_created_at,format="%Y-%m-%dT%H:%M") + ) + + # truncate responses of categorical vars so easier to read + coc_clean <- dplyr::mutate( + .data = coc_clean, + classification = sub(".*CLASSIFICATION_", "", classification), + gender = sub(".*GENDER_", "", gender), + occupation = sub(".*OCCUPATION_", "", occupation), + outcome = sub(".*OUTCOME_", "", outcome_id), + pregnancy_status = sub(".*STATUS_", "", pregnancy_status), + risk_level = sub(".*LEVEL_", "", risk_level), + relationship_certainty_level = sub(".*LEVEL_", "", relationship_certainty_level_id), + relationship_exposure_type = sub(".*TYPE_", "", relationship_exposure_type_id), + relationship_context_of_transmission = sub(".*TRANSMISSION_", "", relationship_social_relationship_type_id), + relationship_exposure_frequency = sub(".*FREQUENCY_", "", relationship_exposure_frequency_id), + relationship_exposure_duration = sub(".*DURATION_", "", relationship_exposure_duration_id) + ) + + contacts_of_contacts_address_history_clean <- dplyr::filter( + .data = contacts_of_contacts_address_history_clean, + addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + ) + + # join in current address from address history, only current place of + # residence + coc_clean <- left_join( + x = coc_clean, + y = contacts_of_contacts_address_history_clean, + by="id" + ) + + # join in info from vacc block + coc_clean <- dplyr::mutate( + .data = coc_clean, + vaccinated = case_when(id %in% contacts_of_contacts_vacc_history_clean$id[contacts_of_contacts_vacc_history_clean$vaccinesreceived_status == "VACCINATED"] ~ TRUE, TRUE ~ FALSE) + ) + + # force NA ages to appear as NA, not as 0 like sometimes occurs + coc_clean <- dplyr::mutate(.data = coc_clean, age_years = as.numeric(age_years)) + coc_clean <- dplyr::mutate(.data = coc_clean, age_years = na_if(age_years,0)) + coc_clean <- dplyr::mutate(.data = coc_clean, age_months = as.numeric(age_months)) + coc_clean <- dplyr::mutate(.data = coc_clean, age_months = na_if(age_months,0)) + + # standardize age vars into just one var, round by 1 decimal + coc_clean <- dplyr::mutate( + .data = coc_clean, + age = case_when(!is.na(age_months) ~ round(age_months / 12, digits = 1), + TRUE ~ age_years) + ) + + # WHO age categories updated Sept 2020: + # 0-4, 5-9, 10-14, 15-19, 20-29, 30-39, 40-49, 50-59, 60-64, 65-69, 70-74, + # 75-79, 80+ + # these categories below match that of detailed WHO surveillance dash: + # <5, 5-14, 15-24, 25-64, 65+ + coc_clean <- dplyr::mutate( + .data = coc_clean, + age_class = factor( + case_when( + age <= 4 ~ "<5", + age <= 14 ~ "5-14", + age <= 24 ~ "15-24", + age <= 64 ~ "25-64", + is.finite(age) ~ "65+", + TRUE ~ "unknown" + ), levels = c( + "<5", + "5-14", + "15-24", + "25-64", + "65+", + "unknown" + )), + age_class = factor( + age_class, + levels = rev(levels(age_class))) + ) + + # organize order of vars, only bring in what we need, take away confusing vars + coc_clean <- dplyr::select( + .data = coc_clean, + "id", # identifier + "visual_id", # identifier + "classification", # identifier + "first_name", # demographics + "middle_name", # demographics + "last_name", # demographics + "gender", # demographics + "age", # demographics + "age_class", # demographics + "occupation", # demographics + "pregnancy_status", # demographics + "date_of_reporting", # dates + "date_of_last_contact", # dates + "date_of_burial", # dates + "was_case", # epi + "risk_level", # epi + "risk_reason", # epi + "safe_burial", # epi + "transfer_refused", # epi + "responsible_user_id", # assigned contact tracer + dplyr::matches("^admin_.*name$"), # address + "lat",# address + "long", # address + "address", # address + "postal_code", # address + "city", # address + "telephone", # address + "email", # address + "vaccinated", # vaccination + "outcome", # outcome + "date_of_outcome", # outcome + "relationship_exposure_type", + "relationship_context_of_transmission", + "relationship_exposure_duration", + "relationship_exposure_frequency", + "relationship_certainty_level", + "relationship_cluster_id", + location_id = "addresses_locationid", # uuid in case need later for joining of whatever sort. + "created_by", + "datetime_created_at", + "updated_by", + "datetime_updated_at" + ) + + return(coc_clean) +} diff --git a/man/clean_contacts_of_contacts.Rd b/man/clean_contacts_of_contacts.Rd new file mode 100644 index 0000000..2cada00 --- /dev/null +++ b/man/clean_contacts_of_contacts.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contacts_of_contacts.R +\name{clean_contacts_of_contacts} +\alias{clean_contacts_of_contacts} +\title{Clean contacts of contacts data} +\usage{ +clean_contacts_of_contacts( + contacts_of_contacts, + contacts_of_contacts_address_history_clean, + contacts_of_contacts_vacc_history_clean +) +} +\arguments{ +\item{contacts_of_contacts}{A \code{tibble} containing the contacts of contacts +data.} + +\item{contacts_of_contacts_address_history_clean}{A \code{tibble} containing the +cleaned address history data from contacts of contacts (data is cleaned by +\code{\link[=clean_contacts_of_contacts_address_history]{clean_contacts_of_contacts_address_history()}}).} + +\item{contacts_of_contacts_vacc_history_clean}{A \code{tibble} containing the +cleaned vaccination history from contacts of contacts (data is cleaned by +\code{\link[=clean_contacts_of_contacts_vax_history]{clean_contacts_of_contacts_vax_history()}}).} +} +\value{ +A \code{tibble} containing the cleaned contacts of contacts data. +} +\description{ +Cleans and un-nests contacts of contacts data. Contacts of +contacts data is returned by \code{\link[=get_contacts_of_contacts]{get_contacts_of_contacts()}}. +} +\examples{ +\dontrun{ + url <- "https://MyGoDataServer.com/" + username <- "myemail@email.com" + password <- "mypassword" + outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) + +locations_clean <- clean_locations(locations = locations) + +contacts_of_contacts_address_history_clean <- clean_contacts_of_contacts_address_history( + contacts_of_contacts = contacts_of_contacts, + locations_clean = locations_clean +) + +contacts_of_contacts_vacc_history_clean <- clean_contacts_of_contacts_vax_history( + contacts_of_contacts = contacts_of_contacts +) + +contacts_of_contacts_clean <- clean_contacts_of_contacts( + contacts_of_contacts = contacts_of_contacts, + contacts_of_contacts_address_history_clean = contacts_of_contacts_address_history_clean, + contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean +) +} +} diff --git a/tests/testthat/test-clean_contacts_of_contacts.R b/tests/testthat/test-clean_contacts_of_contacts.R new file mode 100644 index 0000000..740bfc9 --- /dev/null +++ b/tests/testthat/test-clean_contacts_of_contacts.R @@ -0,0 +1,62 @@ +test_that("clean_contacts_of_contacts works as expected", { + skip("get_contacts_of_contacts requires API call") + + contacts_of_contacts <- get_contacts_of_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + locations_clean <- clean_locations(locations = locations) + + contacts_of_contacts_address_history_clean <- clean_contacts_of_contacts_address_history( + contacts_of_contacts = contacts_of_contacts, + locations_clean = locations_clean + ) + + contacts_of_contacts_vacc_history_clean <- clean_contacts_of_contacts_vax_history( + contacts_of_contacts = contacts_of_contacts + ) + + res <- clean_contacts_of_contacts( + contacts_of_contacts = contacts_of_contacts, + contacts_of_contacts_address_history_clean = contacts_of_contacts_address_history_clean, + contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(14L, 43L)) + expect_true( + all(c( + "id", "visual_id", "classification", "first_name", "middle_name", + "last_name", "gender", "age", "age_class", "occupation", + "pregnancy_status", "date_of_reporting", "date_of_onset", + "date_of_infection", "date_become_case", "date_of_burial", "was_contact", + "risk_level", "risk_reason", "safe_burial", "transfer_refused", + "responsible_user_id", "admin_0_name", "admin_1_name", "admin_2_name", + "lat", "long", "address", "postal_code", "city", "telephone", "email", + "vaccinated", "isolated", "hospitalized", "icu", "outcome", + "date_of_outcome", "location_id", "created_by", "datetime_created_at", + "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + rep("character", 7), "numeric", "factor", "character", "character", + rep("Date", 5), "logical", "character", "character", "logical", "logical", + "character", "character", "character", "character", "numeric", "numeric", + "character", "character", "character", "character", "character", + "logical", "logical", "logical", "logical", "character", "Date", + "character", "character", "POSIXct", "character", "POSIXct" + ) + ) +}) From 442c3a4c81cf4fd43d8e0773b0db28a7aa15d761 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 15 Mar 2023 16:53:20 +0000 Subject: [PATCH 161/203] added clean_followups function, documentation and test --- R/clean_followups.R | 153 ++++++++++++++++++++++++++ man/clean_followups.Rd | 63 +++++++++++ tests/testthat/test-clean_followups.R | 59 ++++++++++ 3 files changed, 275 insertions(+) create mode 100644 R/clean_followups.R create mode 100644 man/clean_followups.Rd create mode 100644 tests/testthat/test-clean_followups.R diff --git a/R/clean_followups.R b/R/clean_followups.R new file mode 100644 index 0000000..056d604 --- /dev/null +++ b/R/clean_followups.R @@ -0,0 +1,153 @@ +#' Clean followup data +#' +#' @description Cleans and un-nests followup data which is returned from +#' [`get_followups()`] +#' +#' @param followups A `tibble` with events data. Followup data is returned by +#' [`get_followups()`]. +#' @param contacts_address_history_clean A `tibble` with cleaned address +#' history data from contacts. Contacts data is returned by [`get_contacts()`] +#' and cleaned by [`clean_contact_address_history()`]. +#' +#' @return A `tibble` with cleaned followup data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' followups <- get_followups( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' locations_clean <- clean_locations(locations = locations) +#' +#' contacts_address_history_clean <- clean_contact_address_history( +#' contacts = contacts, +#' locations_clean = locations_clean +#' ) +#' +#' followups_clean <- clean_followups( +#' followups = followups, +#' contacts_address_history_clean = contacts_address_history_clean +#' ) +#' } +clean_followups <- function(followups, + contacts_address_history_clean) { + + # Remove all deleted records + followups_clean <- dplyr::filter( + .data = followups, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + followups_clean <- dplyr::select_if( + .tbl = followups_clean, + .predicate = purrr::negate(is.list) + ) + + # take out all that are not core variables, otherwise diff versions and + # problems exporting to excel + followups_clean <- dplyr::select( + .data = followups_clean, + -dplyr::contains("questionnaireAnswers") + ) + + # standardize column name syntax + followups_clean <- janitor::clean_names(dat = followups_clean) + + # label timestamps as datetime + followups_clean <- dplyr::rename( + .data = followups_clean, + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + # clean up all character fields + followups_clean <- dplyr::mutate( + .data = followups_clean, + dplyr::across(dplyr::where(is.character), dplyr::na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + followups_clean <- dplyr::mutate_at( + .tbl = followups_clean, + dplyr::vars(date), list(~ as.Date(substr(., 1, 10))) + ) + + followups_clean <- dplyr::mutate( + .data = followups_clean, + datetime_updated_at = as.POSIXct(datetime_updated_at, format = "%Y-%m-%dT%H:%M") + ) + + followups_clean <- dplyr::mutate( + .data = followups_clean, + datetime_created_at = as.POSIXct(datetime_created_at, format = "%Y-%m-%dT%H:%M") + ) + + # truncate responses of categorical vars so easier to read + followups_clean <- dplyr::mutate( + .data = followups_clean, + followup_status = sub(".*TYPE_", "", status_id) + ) + + contacts_address_history_clean <- dplyr::filter( + .data = contacts_address_history_clean, + addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + ) + + followups_clean <- dplyr::left_join( + x = followups_clean, + y = contacts_address_history_clean, + by = "id" + ) + + # organize order of vars, only bring in what we need, take away confusing vars + followups_clean <- dplyr::select( + .data = followups_clean, + "id", # identifier + "contact_id", # identifier + "contact_visual_id", # identifier + "date", # dates + followup_number = "index", # FU status + "followup_status", # FU status + "targeted", # FU status + "responsible_user_id", # assigned contact tracer + "team_id", # assigned contact tracer + dplyr::matches("^admin_.*name$"), # address + "lat", # address + "long", # address + "address", # address + "postal_code", # address + "city", # address + "telephone", # address + "email", # address + location_id = "addresses_locationid", # uuid in case need later for joining of whatever sort. + "created_by", # record modification + "datetime_created_at", # record modification + "updated_by", # record modification + "datetime_updated_at" # record modification + ) + + return(followups_clean) +} diff --git a/man/clean_followups.Rd b/man/clean_followups.Rd new file mode 100644 index 0000000..074c21d --- /dev/null +++ b/man/clean_followups.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_followups.R +\name{clean_followups} +\alias{clean_followups} +\title{Clean followup data} +\usage{ +clean_followups(followups, contacts_address_history_clean) +} +\arguments{ +\item{followups}{A \code{tibble} with events data. Followup data is returned by +\code{\link[=get_followups]{get_followups()}}.} + +\item{contacts_address_history_clean}{A \code{tibble} with cleaned address +history data from contacts. Contacts data is returned by \code{\link[=get_contacts]{get_contacts()}} +and cleaned by \code{\link[=clean_contact_address_history]{clean_contact_address_history()}}.} +} +\value{ +A \code{tibble} with cleaned followup data. +} +\description{ +Cleans and un-nests followup data which is returned from +\code{\link[=get_followups]{get_followups()}} +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +followups <- get_followups( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) + +locations_clean <- clean_locations(locations = locations) + +contacts_address_history_clean <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean +) + +followups_clean <- clean_followups( + followups = followups, + contacts_address_history_clean = contacts_address_history_clean +) +} +} diff --git a/tests/testthat/test-clean_followups.R b/tests/testthat/test-clean_followups.R new file mode 100644 index 0000000..630f7d4 --- /dev/null +++ b/tests/testthat/test-clean_followups.R @@ -0,0 +1,59 @@ +test_that("clean_followups works as expected", { + skip("get_followups requires API call") + + followups <- get_followups( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + + locations_clean <- clean_locations(locations = locations) + + contacts_address_history_clean <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean + ) + + res <- clean_followups( + followups = followups, + contacts_address_history_clean = contacts_address_history_clean + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(49L, 24L)) + expect_true( + all(c( + "id", "contact_id", "contact_visual_id", "date", "followup_number", + "followup_status", "targeted", "responsible_user_id", "team_id", + "admin_0_name", "admin_1_name", "admin_2_name", "lat", "long", "address", + "postal_code", "city", "telephone", "email", "location_id", "created_by", + "datetime_created_at", "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + "character", "character", "character", "Date", "integer", "character", + "logical", "character", "character", "character", "character", + "character", "numeric", "numeric", "character", "logical", "character", + "character", "character", "character", "character", "POSIXct", + "character", "POSIXct" + ) + ) +}) From 688d28cfef38048563dce19a3066fa2fc4700795 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Wed, 15 Mar 2023 17:32:51 +0000 Subject: [PATCH 162/203] added clean_events function, documentation and test (WIP function currently errors) --- R/clean_events.R | 116 +++++++++++++++++++++++++++++ man/clean_events.Rd | 50 +++++++++++++ tests/testthat/test-clean_events.R | 28 +++++++ 3 files changed, 194 insertions(+) create mode 100644 R/clean_events.R create mode 100644 man/clean_events.Rd create mode 100644 tests/testthat/test-clean_events.R diff --git a/R/clean_events.R b/R/clean_events.R new file mode 100644 index 0000000..7bd02fa --- /dev/null +++ b/R/clean_events.R @@ -0,0 +1,116 @@ +#' Clean events data +#' +#' @description Cleans and un-nests events data which is returned from +#' [`get_events()`]. +#' +#' @param events A `tibble` with events data. Events data is returned by +#' [`get_events()`]. +#' @param locations_clean A `tibble` with cleaned location data. Location data +#' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' Make sure the locations data is cleaned prior to supplying it to +#' `clean_events()`. +#' +#' @return A `tibble` with cleaned events data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' events <- get_events( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' locations_clean <- clean_locations(locations = locations) +#' +#' clean_events <- clean_events( +#' events = events, +#' locations_clean = locations_clean) +#' } +clean_events <- function(events, + locations_clean) { + + # Remove all deleted records + clean_events <- dplyr::filter( + .data = events, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + clean_events <- dplyr::select_if(.tbl = clean_events, purrr::negate(is.list)) + + # standardize column name syntax + clean_events <- janitor::clean_names(clean_events) + + # label timestamps as datetime + clean_events <- dplyr::rename( + .data = clean_events, + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + # clean up all character fields + clean_events <- dplyr::mutate( + .data = clean_events, + dplyr::across(dplyr::where(is.character), na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + clean_events <- mutate_at( + clean_events, + dplyr::vars(dplyr::starts_with("date_")), + list(~ as.Date(substr(., 1, 10))) + ) + clean_events <- mutate( + clean_events, + datetime_updated_at = as.POSIXct(datetime_updated_at, format="%Y-%m-%dT%H:%M")) + clean_events <- mutate( + clean_events, + datetime_created_at = as.POSIXct(datetime_created_at,format="%Y-%m-%dT%H:%M")) + + clean_events <- dplyr::left_join( + x = clean_events, + y = select(locations_clean, + location_id, + matches("^admin_.*name$")), + by = c("address_location_id" = "location_id") + ) + + # organize order of vars, only bring in what we need, take away + # confusing vars + clean_events <- dplyr::select( + .data = clean_events, + "id", # identifier + "name", # identifier + "date", # dates + "date_of_reporting", # dates + "description", + "responsible_user_id", # assigned contact tracer + matches("^admin_.*name$"), + lat = "address_geo_location_lat", # address + long = "address_geo_location_lng", # address + address = "address_address_line1", # address + postal_code = "address_postal_code", # address + city = "address_city", # address + telephone = "address_phone_number", # address + email = "address_email_address", # address + location_id = "address_location_id", # uuid in case need later for joining of whatever sort. + "created_by", + "datetime_created_at", + "updated_by", + "datetime_updated_at" + ) # record modification + + return(clean_events) +} diff --git a/man/clean_events.Rd b/man/clean_events.Rd new file mode 100644 index 0000000..2e444d3 --- /dev/null +++ b/man/clean_events.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_events.R +\name{clean_events} +\alias{clean_events} +\title{Clean events data} +\usage{ +clean_events(events, locations_clean) +} +\arguments{ +\item{events}{A \code{tibble} with events data. Events data is returned by +\code{\link[=get_events]{get_events()}}.} + +\item{locations_clean}{A \code{tibble} with cleaned location data. Location data +is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}. +Make sure the locations data is cleaned prior to supplying it to +\code{clean_events()}.} +} +\value{ +A \code{tibble} with cleaned events data. +} +\description{ +Cleans and un-nests events data which is returned from +\code{\link[=get_events]{get_events()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +events <- get_events( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) +locations_clean <- clean_locations(locations = locations) + +clean_events <- clean_events( + events = events, + locations_clean = locations_clean) +} +} diff --git a/tests/testthat/test-clean_events.R b/tests/testthat/test-clean_events.R new file mode 100644 index 0000000..90cd59c --- /dev/null +++ b/tests/testthat/test-clean_events.R @@ -0,0 +1,28 @@ +test_that("clean_events works as expected", { + skip("get_events requires API call") + + res <- get_events( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + res <- clean_events(events = res) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(3L, 15L)) + expect_true( + all(c( + "id", "name", "user_ids_1", "user_ids_2", "user_ids_3", "user_ids_4", + "user_ids_5", "location_ids_1", "location_ids_2", "location_ids_3", + "location_ids_4", "created_by", "datetime_created_at", "updated_by", + "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + rep("character", 15) + ) +}) From 05380ac88b54ceaeb3b91c4705e4cf74ba0c777c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:09:29 +0000 Subject: [PATCH 163/203] added clean_users function, documentation and test --- R/clean_users.R | 69 +++++++++++++++++++++++++++++++ man/clean_users.Rd | 34 +++++++++++++++ tests/testthat/test-clean_users.R | 23 +++++++++++ 3 files changed, 126 insertions(+) create mode 100644 R/clean_users.R create mode 100644 man/clean_users.Rd create mode 100644 tests/testthat/test-clean_users.R diff --git a/R/clean_users.R b/R/clean_users.R new file mode 100644 index 0000000..3653110 --- /dev/null +++ b/R/clean_users.R @@ -0,0 +1,69 @@ +#' Cleans users data +#' +#' @description Cleans and un-nests users data. Users data is returned by +#' [`get_users()`]. +#' +#' @param users A `tibble` containing users data. Users data is returned by +#' [`get_users()`]. +#' +#' @return A `tibble` with cleaned users data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' +#' users <- get_users( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' clean_users <- clean_users(users) +#' } +clean_users <- function(users) { + + # standardize column name syntax + clean_users <- janitor::clean_names(clean_users) + + # label timestamps as datetime + clean_users <- dplyr::rename( + clean_users, + datetime_last_login = "last_login_date", + datetime_created_at = "created_at" + ) + + # clean up all character fields + clean_users <- dplyr::mutate( + .data = clean_users, + dplyr::across(dplyr::where(is.character), dplyr::na_if, "") + ) + + clean_users <- tidyr::unnest_wider(clean_users, "role_ids", names_sep = "_") + + # truncate responses of categorical vars so easier to read + clean_users <- dplyr::mutate( + clean_users, + institution_name = sub(".*NAME_", "", .data$institution_name) + ) + + # organize order of vars, only bring in what we need, take away confusing vars + clean_users <- dplyr::select( + .data = clean_users, + "id", + "first_name", + "last_name", + "email", + "institution_name", + "disregard_geographic_restrictions", + dplyr::starts_with("role_ids"), + "active_outbreak_id", + "created_by", + "datetime_created_at", + "datetime_last_login" + ) + + return(clean_users) +} diff --git a/man/clean_users.Rd b/man/clean_users.Rd new file mode 100644 index 0000000..d06731b --- /dev/null +++ b/man/clean_users.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_users.R +\name{clean_users} +\alias{clean_users} +\title{Cleans users data} +\usage{ +clean_users(users) +} +\arguments{ +\item{users}{A \code{tibble} containing users data. Users data is returned by +\code{\link[=get_users]{get_users()}}.} +} +\value{ +A \code{tibble} with cleaned users data. +} +\description{ +Cleans and un-nests users data. Users data is returned by +\code{\link[=get_users]{get_users()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" + +users <- get_users( + url = url, + username = username, + password = password +) + +clean_users <- clean_users(users) +} +} diff --git a/tests/testthat/test-clean_users.R b/tests/testthat/test-clean_users.R new file mode 100644 index 0000000..876963a --- /dev/null +++ b/tests/testthat/test-clean_users.R @@ -0,0 +1,23 @@ +test_that("clean_users works as expected", { + skip("get_users requires API call") + + users <- get_users(url = url, username = username, password = password) + res <- clean_users(users = users) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(20L, 24L)) + expect_true( + all(c( + "id", "first_name", "last_name", "email", "institution_name", + "disregard_geographic_restrictions", paste0("role_ids_", 1:14), + "active_outbreak_id", "created_by", "datetime_created_at", + "datetime_last_login" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c(rep("character", 5), "logical", rep("character", 18)) + ) +}) From 0de513ac86856479848fac16dba3b110fe9f7ad5 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:11:03 +0000 Subject: [PATCH 164/203] fixed bug in clean_users --- R/clean_users.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/clean_users.R b/R/clean_users.R index 3653110..cca13d3 100644 --- a/R/clean_users.R +++ b/R/clean_users.R @@ -26,7 +26,7 @@ clean_users <- function(users) { # standardize column name syntax - clean_users <- janitor::clean_names(clean_users) + clean_users <- janitor::clean_names(users) # label timestamps as datetime clean_users <- dplyr::rename( From fc1180d8f84982579a1265ddedb897c3fe771d5e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:16:18 +0000 Subject: [PATCH 165/203] added clean_teams function, documentation and test --- R/clean_teams.R | 75 +++++++++++++++++++++++++++++++ man/clean_teams.Rd | 34 ++++++++++++++ tests/testthat/test-clean_teams.R | 23 ++++++++++ 3 files changed, 132 insertions(+) create mode 100644 R/clean_teams.R create mode 100644 man/clean_teams.Rd create mode 100644 tests/testthat/test-clean_teams.R diff --git a/R/clean_teams.R b/R/clean_teams.R new file mode 100644 index 0000000..6c670d0 --- /dev/null +++ b/R/clean_teams.R @@ -0,0 +1,75 @@ +#' Clean teams data +#' +#' @description Cleans and un-nests teams data. Teams data is returned by +#' [`get_teams()`]. +#' +#' @param teams A `tibble` containing teams data. Teams data is returned by +#' [`get_teams()`]. +#' +#' @return A `tibble` of cleaned teams data +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' +#' teams <- get_teams( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' clean_teams <- clean_teams(teams) +#' } +clean_teams <- function(teams) { + + # Remove all deleted records + clean_teams <- dplyr::filter( + .data = teams, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # standardize column name syntax + clean_teams <- janitor::clean_names(clean_teams) + + # label timestamps as datetime + clean_teams <- dplyr::rename( + .data = clean_teams, + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + #clean up all character fields + clean_teams <- dplyr::mutate( + .data = clean_teams, + dplyr::across(dplyr::where(is.character), na_if, "") + ) + + clean_teams <- tidyr::unnest_wider( + data = clean_teams, + col = "user_ids", + names_sep = "_" + ) + clean_teams <- tidyr::unnest_wider( + data = clean_teams, + col = "location_ids", + names_sep = "_" + ) + + # organize order of vars, only bring in what we need, take away confusing vars + clean_teams <- dplyr::select( + .data = clean_teams, + "id", + "name", + dplyr::starts_with("user_ids"), + dplyr::starts_with("location_ids"), + "created_by", # record modification + "datetime_created_at", # record modification + "updated_by", # record modification + "datetime_updated_at" # record modification + ) + + return(clean_teams) +} diff --git a/man/clean_teams.Rd b/man/clean_teams.Rd new file mode 100644 index 0000000..27dfa2e --- /dev/null +++ b/man/clean_teams.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_teams.R +\name{clean_teams} +\alias{clean_teams} +\title{Clean teams data} +\usage{ +clean_teams(teams) +} +\arguments{ +\item{teams}{A \code{tibble} containing teams data. Teams data is returned by +\code{\link[=get_teams]{get_teams()}}.} +} +\value{ +A \code{tibble} of cleaned teams data +} +\description{ +Cleans and un-nests teams data. Teams data is returned by +\code{\link[=get_teams]{get_teams()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" + +teams <- get_teams( + url = url, + username = username, + password = password +) + +clean_teams <- clean_teams(teams) +} +} diff --git a/tests/testthat/test-clean_teams.R b/tests/testthat/test-clean_teams.R new file mode 100644 index 0000000..ff8a10f --- /dev/null +++ b/tests/testthat/test-clean_teams.R @@ -0,0 +1,23 @@ +test_that("clean_teams works as expected", { + skip("get_teams requires API call") + + teams <- get_teams(url = url, username = username, password = password) + res <- clean_teams(teams = teams) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(4L, 15L)) + expect_true( + all(c( + "id", "name", "user_ids_1", "user_ids_2", "user_ids_3", "user_ids_4", + "user_ids_5", "location_ids_1", "location_ids_2", "location_ids_3", + "location_ids_4", "created_by", "datetime_created_at", "updated_by", + "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + rep("character", 15) + ) +}) From 035e7109d4100279668c798067ee5c7287265cd0 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:32:47 +0000 Subject: [PATCH 166/203] added clean_relationships function, documentation and test --- R/clean_relationships.R | 104 ++++++++++++++++++++++ man/clean_relationships.Rd | 36 ++++++++ tests/testthat/test-clean_relationships.R | 30 +++++++ 3 files changed, 170 insertions(+) create mode 100644 R/clean_relationships.R create mode 100644 man/clean_relationships.Rd create mode 100644 tests/testthat/test-clean_relationships.R diff --git a/R/clean_relationships.R b/R/clean_relationships.R new file mode 100644 index 0000000..c0cc883 --- /dev/null +++ b/R/clean_relationships.R @@ -0,0 +1,104 @@ +#' Cleans relationship data +#' +#' @description Cleans and un-nests relationship data. Relationship data is +#' returned by [`get_relationships()`]. +#' +#' @param relationships A `tibble` of relationship data. Relationship data is +#' returned by [`get_relationships()`]. +#' +#' @return A `tibble` with clean relationship data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' relationships <- get_relationships( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' clean_relationships <- clean_relationships(relationships) +#' } +clean_relationships <- function(relationships) { + + # Remove all deleted records + clean_relationships <- dplyr::filter( + .data = relationships, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + clean_relationships <- dplyr::select_if( + .tbl = clean_relationships, + purrr::negate(is.list) + ) + + # standardize column name syntax + clean_relationships <- janitor::clean_names(clean_relationships) + + # label timestamps as datetime + clean_relationships <- dplyr::rename( + .data = clean_relationships, + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + #clean up all character fields + clean_relationships <- dplyr::mutate( + .data = clean_relationships, + dplyr::across(dplyr::where(is.character), na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + clean_relationships <- dplyr::mutate( + .data = clean_relationships, + dplyr::across( + dplyr::starts_with("date_"), list(~ as.Date(substr(., 1, 10))) + ) + ) + clean_relationships <- dplyr::mutate( + .data = clean_relationships, + datetime_updated_at = as.POSIXct( + datetime_updated_at, + format = "%Y-%m-%dT%H:%M" + ) + ) + clean_relationships <- dplyr::mutate( + .data = clean_relationships, + datetime_created_at = as.POSIXct( + datetime_created_at, + format = "%Y-%m-%dT%H:%M" + ) + ) + + # truncate responses of categorical vars so easier to read + clean_relationships <- dplyr::mutate( + .data = clean_relationships, + source_person_type = sub(".*TYPE_", "", source_person_type), + target_person_type = sub(".*TYPE_", "", target_person_type) + ) + + # organize order of vars, only bring in what we need, take away confusing vars + clean_relationships <- dplyr::select( + .data = clean_relationships, + "id", #id + "source_person_id", #id + "source_person_visual_id", #id + "target_person_id", #id + "target_person_visual_id", #id + "source_person_type", #id + "target_person_type", #id + "created_by", # record modification + "datetime_created_at", # record modification + "updated_by", # record modification + "datetime_updated_at" # record modification + ) + + return(clean_relationships) +} diff --git a/man/clean_relationships.Rd b/man/clean_relationships.Rd new file mode 100644 index 0000000..e834893 --- /dev/null +++ b/man/clean_relationships.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_relationships.R +\name{clean_relationships} +\alias{clean_relationships} +\title{Cleans relationship data} +\usage{ +clean_relationships(relationships) +} +\arguments{ +\item{relationships}{A \code{tibble} of relationship data. Relationship data is +returned by \code{\link[=get_relationships]{get_relationships()}}.} +} +\value{ +A \code{tibble} with clean relationship data. +} +\description{ +Cleans and un-nests relationship data. Relationship data is +returned by \code{\link[=get_relationships]{get_relationships()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +clean_relationships <- clean_relationships(relationships) +} +} diff --git a/tests/testthat/test-clean_relationships.R b/tests/testthat/test-clean_relationships.R new file mode 100644 index 0000000..6fff74c --- /dev/null +++ b/tests/testthat/test-clean_relationships.R @@ -0,0 +1,30 @@ +test_that("clean_relationships works as expected", { + skip("get_relationships requires API call") + + relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + res <- clean_relationships(relationships = relationships) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(28L, 11L)) + expect_true( + all(c( + "id", "source_person_id", "source_person_visual_id", "target_person_id", + "target_person_visual_id", "source_person_type", "target_person_type", + "created_by", "datetime_created_at", "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + "character", "character", "character", "character", "character", + "character", "character", "character", "POSIXct", "character", "POSIXct" + ) + ) +}) From 54ec892dc831f7ea416c3d91d5171495f10bfab1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:46:34 +0000 Subject: [PATCH 167/203] added contacts_per_case function, documentation and test --- R/contacts_per_case.R | 49 +++++++++++++++++++++++++ man/contacts_per_case.Rd | 41 +++++++++++++++++++++ tests/testthat/test-contacts_per_case.R | 28 ++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 R/contacts_per_case.R create mode 100644 man/contacts_per_case.Rd create mode 100644 tests/testthat/test-contacts_per_case.R diff --git a/R/contacts_per_case.R b/R/contacts_per_case.R new file mode 100644 index 0000000..cd32caa --- /dev/null +++ b/R/contacts_per_case.R @@ -0,0 +1,49 @@ +#' Counts the number of contacts per case from relationship data +#' +#' @description Uses cleaned relationship data to tally the number of contacts +#' per case. Relationship data is returned by [`get_relationships()`] and +#' cleaned by [`clean_relationships()`]. +#' +#' @param relationships_clean A `tibble` with the cleaned relationship data. +#' Relationship data is returned by [`get_relationships()`] and cleaned by +#' [`clean_relationships()`]. +#' +#' @return A `tibble` with the number of contacts associated to each source +#' person +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' relationships <- get_relationships( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' clean_relationships <- clean_relationships(relationships) +#' +#' contacts_per_case <- contacts_per_case(clean_relationships) +#' } +contacts_per_case <- function(relationships_clean) { + + contacts_per_case <- dplyr::group_by( + .data = relationships_clean, + .data$source_person_id + ) + + contacts_per_case <- dplyr::tally(x = contacts_per_case) + + contacts_per_case <- dplyr::select( + .data = contacts_per_case, + "source_person_id", + no_contacts = n + ) + + return(contacts_per_case) +} diff --git a/man/contacts_per_case.Rd b/man/contacts_per_case.Rd new file mode 100644 index 0000000..b3f7046 --- /dev/null +++ b/man/contacts_per_case.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/contacts_per_case.R +\name{contacts_per_case} +\alias{contacts_per_case} +\title{Counts the number of contacts per case from relationship data} +\usage{ +contacts_per_case(relationships_clean) +} +\arguments{ +\item{relationships_clean}{A \code{tibble} with the cleaned relationship data. +Relationship data is returned by \code{\link[=get_relationships]{get_relationships()}} and cleaned by +\code{\link[=clean_relationships]{clean_relationships()}}.} +} +\value{ +A \code{tibble} with the number of contacts associated to each source +person +} +\description{ +Uses cleaned relationship data to tally the number of contacts +per case. Relationship data is returned by \code{\link[=get_relationships]{get_relationships()}} and +cleaned by \code{\link[=clean_relationships]{clean_relationships()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +clean_relationships <- clean_relationships(relationships) + +contacts_per_case <- contacts_per_case(clean_relationships) +} +} diff --git a/tests/testthat/test-contacts_per_case.R b/tests/testthat/test-contacts_per_case.R new file mode 100644 index 0000000..e6fbd0a --- /dev/null +++ b/tests/testthat/test-contacts_per_case.R @@ -0,0 +1,28 @@ +test_that("contacts_per_case works as expected", { + skip("get_relationships requires API call") + + relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + clean_relationships <- clean_relationships(relationships) + + res <- contacts_per_case(clean_relationships) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(10L, 2L)) + expect_true( + all(c( + "source_person_id", "no_contacts" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "integer") + ) +}) From e3ee64088a0763fe550670e2211903ba6925e610 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 16 Mar 2023 14:54:16 +0000 Subject: [PATCH 168/203] added exposures_per_case function, documentation and test --- R/exposures_per_case.R | 49 ++++++++++++++++++++++++ man/exposures_per_case.Rd | 41 ++++++++++++++++++++ tests/testthat/test-exposures_per_case.R | 28 ++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 R/exposures_per_case.R create mode 100644 man/exposures_per_case.Rd create mode 100644 tests/testthat/test-exposures_per_case.R diff --git a/R/exposures_per_case.R b/R/exposures_per_case.R new file mode 100644 index 0000000..a2ef7f2 --- /dev/null +++ b/R/exposures_per_case.R @@ -0,0 +1,49 @@ +#' Counts the number of exposures per case from relationship data +#' +#' @description Uses cleaned relationship data to tally the number of contacts +#' per case. Relationship data is returned by [`get_relationships()`] and +#' cleaned by [`clean_relationships()`]. +#' +#' @param relationships_clean A `tibble` with the cleaned relationship data. +#' Relationship data is returned by [`get_relationships()`] and cleaned by +#' [`clean_relationships()`]. +#' +#' @return A `tibble` with the number of exposures associated to each target +#' person +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' relationships <- get_relationships( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' clean_relationships <- clean_relationships(relationships) +#' +#' exposures_per_case <- exposures_per_case(clean_relationships) +#' } +exposures_per_case <- function(relationships_clean) { + + exposures_per_case <- dplyr::group_by( + .data = relationships_clean, + .data$target_person_id + ) + + exposures_per_case <- dplyr::tally(x = exposures_per_case) + + exposures_per_case <- dplyr::select( + .data = exposures_per_case, + "target_person_id", + no_exposures = n + ) + + return(exposures_per_case) +} diff --git a/man/exposures_per_case.Rd b/man/exposures_per_case.Rd new file mode 100644 index 0000000..51942e5 --- /dev/null +++ b/man/exposures_per_case.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/exposures_per_case.R +\name{exposures_per_case} +\alias{exposures_per_case} +\title{Counts the number of exposures per case from relationship data} +\usage{ +exposures_per_case(relationships_clean) +} +\arguments{ +\item{relationships_clean}{A \code{tibble} with the cleaned relationship data. +Relationship data is returned by \code{\link[=get_relationships]{get_relationships()}} and cleaned by +\code{\link[=clean_relationships]{clean_relationships()}}.} +} +\value{ +A \code{tibble} with the number of exposures associated to each target +person +} +\description{ +Uses cleaned relationship data to tally the number of contacts +per case. Relationship data is returned by \code{\link[=get_relationships]{get_relationships()}} and +cleaned by \code{\link[=clean_relationships]{clean_relationships()}}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +clean_relationships <- clean_relationships(relationships) + +exposures_per_case <- exposures_per_case(clean_relationships) +} +} diff --git a/tests/testthat/test-exposures_per_case.R b/tests/testthat/test-exposures_per_case.R new file mode 100644 index 0000000..e47ab2b --- /dev/null +++ b/tests/testthat/test-exposures_per_case.R @@ -0,0 +1,28 @@ +test_that("exposures_per_case works as expected", { + skip("get_relationships requires API call") + + relationships <- get_relationships( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + clean_relationships <- clean_relationships(relationships) + + res <- exposures_per_case(clean_relationships) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(23L, 2L)) + expect_true( + all(c( + "target_person_id", "no_exposures" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], class)), + c("character", "integer") + ) +}) From bab80d634c6a5a15155719d451dfe620a3c1a6b8 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:04:55 +0100 Subject: [PATCH 169/203] added clean_contacts --- R/clean_contacts.R | 278 ++++++++++++++++++++++++++++++++++++++++++ man/clean_contacts.Rd | 90 ++++++++++++++ 2 files changed, 368 insertions(+) create mode 100644 R/clean_contacts.R create mode 100644 man/clean_contacts.Rd diff --git a/R/clean_contacts.R b/R/clean_contacts.R new file mode 100644 index 0000000..d2907d8 --- /dev/null +++ b/R/clean_contacts.R @@ -0,0 +1,278 @@ +#' Clean contacts data +#' +#' @description Cleans and un-nests contact data. Contact data is returned by +#' [`get_contacts()`]. +#' +#' @param contacts A `tibble` containing the contact data. +#' @param contacts_address_history_clean A `tibble` containing the cleaned +#' address history data from contacts (data is cleaned by +#' [`clean_contact_address_history()`]. +#' @param contacts_vacc_history_clean A `tibble` containing the cleaned +#' vaccination history data from contacts (data is cleaned by +#' [`clean_contact_vax_history()`]. +#' @param contacts_becoming_cases A `tibble` containing the cleaned data on +#' contacts that became cases (date is produced using +#' [`cases_from_contacts()`]). +#' +#' @return A `tibble` containing the cleaned case data. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' locations_clean <- clean_locations(locations = locations) +#' +#' # other cleaned data required for `clean_contacts()` +#' contacts_vacc_history_clean <- clean_contact_vax_history(contacts = contacts) +#' contacts_address_history_clean <- clean_contact_address_history( +#' contacts = contacts, +#' locations_clean = locations_clean +#' ) +#' +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' cases_address_history_clean <- clean_case_address_history(cases = cases) +#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases) +#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +#' +#' cases_clean <- clean_cases( +#' cases = cases, +#' cases_address_history_clean = cases_address_history_clean, +#' cases_vacc_history_clean = cases_vacc_history_clean, +#' cases_dateranges_history_clean = cases_dateranges_history_clean +#' ) +#' contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) +#' +#' contacts_clean <- clean_contacts( +#' contacts = contacts, +#' contacts_address_history_clean = cases_address_history_clean, +#' contacts_vacc_history_clean = cases_vacc_history_clean, +#' contacts_becoming_cases = contacts_becoming_cases +#' ) +#' } +clean_contacts <- function(contacts, + contacts_address_history_clean, + contacts_vacc_history_clean, + contacts_becoming_cases) { + + # Remove all deleted records + contacts_clean <- dplyr::filter( + .data = contacts, + .data$deleted == FALSE | is.na(.data$deleted) + ) + + # Remove all nested fields, otherwise problems with exporting to excel + contacts_clean <- dplyr::select_if( + .tbl = contacts_clean, + .predicate = purrr::negate(is.list) + ) + + # take out all that are not core variables, otherwise diff versions and + # problems exporting to excel + contacts_clean <- dplyr::select( + .data = contacts_clean, + -dplyr::contains("questionnaireAnswers") + ) + + # standardize column name syntax + contacts_clean <- janitor::clean_names(dat = contacts_clean) + + # label timestamps as datetime + contacts_clean <- dplyr::rename( + .data = contacts_clean, + date_of_birth = "dob", + date_of_follow_up_start = "follow_up_start_date", + date_of_follow_up_end = "follow_up_end_date", + datetime_updated_at = "updated_at", + datetime_created_at = "created_at" + ) + + # take out other unnecessary vars that are unnecessary and may confuse + # (i.e. was_case for cases) + contacts_clean <- dplyr::select( + .data = contacts_clean, + -c( + "is_date_of_reporting_approximate", + "was_contact", + "follow_up_original_start_date", + "type", + "deleted", + "created_on" + ) + ) + + #clean up all character fields + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + dplyr::across(dplyr::where(is.character), na_if, "") + ) + + # clean date formats (TODO: edit this so that we can see time stamps) + contacts_clean <- dplyr::mutate_at( + .tbl = contacts_clean, + .vars = dplyr::vars(dplyr::starts_with("date_")), + list(~ as.Date(substr(., 1, 10))) + ) + + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + datetime_updated_at = as.POSIXct(datetime_updated_at, format = "%Y-%m-%dT%H:%M") + ) + + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + datetime_created_at = as.POSIXct(datetime_created_at, format = "%Y-%m-%dT%H:%M") + ) + + # truncate responses of categorical vars so easier to read + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + classification = sub(".*CLASSIFICATION_", "", classification), + gender = sub(".*GENDER_", "", gender), + occupation = sub(".*OCCUPATION_", "", occupation), + outcome = sub(".*OUTCOME_", "", outcome_id), + pregnancy_status = sub(".*STATUS_", "", pregnancy_status), + risk_level = sub(".*LEVEL_", "", risk_level), + follow_up_status = sub(".*TYPE_", "", follow_up_status), + relationship_certainty_level = sub(".*LEVEL_", "", relationship_certainty_level_id), + relationship_exposure_type = sub(".*TYPE_", "", relationship_exposure_type_id), + relationship_context_of_transmission = sub(".*TRANSMISSION_", "", relationship_social_relationship_type_id), + relationship_exposure_frequency = sub(".*FREQUENCY_", "", relationship_exposure_frequency_id), + relationship_exposure_duration = sub(".*DURATION_", "", relationship_exposure_duration_id) + ) + + contacts_address_history_clean <- dplyr::filter( + .data = contacts_address_history_clean, + addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + ) + + # join in current address from address history, only current place of residence + contacts_clean <- dplyr::left_join( + x = contacts_clean, + y = contacts_address_history_clean, + by = "id" + ) + + # join in info from vacc block + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + vaccinated = case_when(id %in% contacts_vacc_history_clean$id[contacts_vacc_history_clean$vaccinesreceived_status == "VACCINATED"] ~ TRUE, TRUE ~ FALSE) + ) + + # force NA ages to appear as NA, not as 0 like sometimes occurs + contacts_clean <- dplyr::mutate(.data = contacts_clean, age_years = as.numeric(age_years)) + contacts_clean <- dplyr::mutate(.data = contacts_clean, age_years = na_if(age_years,0)) + contacts_clean <- dplyr::mutate(.data = contacts_clean, age_months = as.numeric(age_months)) + contacts_clean <- dplyr::mutate(.data = contacts_clean, age_months = na_if(age_months,0)) + + # standardize age vars into just one var, round by 1 decimal + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + age = case_when(!is.na(age_months) ~ round(age_months / 12, digits = 1), + TRUE ~ age_years)) + + # WHO age categories updated Sept 2020: + # 0-4, 5-9, 10-14, 15-19, 20-29, 30-39, 40-49, 50-59, 60-64, 65-69, 70-74, + # 75-79, 80+ + # these categories below match that of detailed WHO surveillance dash: + # <5, 5-14, 15-24, 25-64, 65+ + contacts_clean <- dplyr::mutate( + .data = contacts_clean, + age_class = factor( + case_when( + age <= 4 ~ "<5", + age <= 14 ~ "5-14", + age <= 24 ~ "15-24", + age <= 64 ~ "25-64", + is.finite(age) ~ "65+", + TRUE ~ "unknown" + ), levels = c( + "<5", + "5-14", + "15-24", + "25-64", + "65+", + "unknown" + )), + age_class = factor( + age_class, + levels = rev(levels(age_class))) + ) + + # organize order of vars, only bring in what we need, take away confusing vars + contacts_clean <- dplyr::select( + .data = contacts_clean, + id, # identifier + visual_id, # identifier + classification, # identifier + follow_up_status, # identifier + first_name, # demographics + middle_name, # demographics + last_name, # demographics + gender, # demographics + age, # demographics + age_class, # demographics + occupation, # demographics + pregnancy_status, # demographics + date_of_reporting, # dates + date_of_last_contact, # dates + date_of_burial, # dates + date_of_follow_up_start, # dates + date_of_follow_up_end, # dates + was_case, # epi + risk_level, # epi + risk_reason, # epi + safe_burial, # epi + transfer_refused, # epi + responsible_user_id, # assigned contact tracer + follow_up_team_id, # assigned contact tracer + matches("^admin_.*name$"), + lat, # address + long, # address + address, # address + postal_code, # address + city, # address + telephone, # address + email, # address + vaccinated, # vaccination + outcome, # outcome + date_of_outcome, # outcome + relationship_exposure_type, + relationship_context_of_transmission, + relationship_exposure_duration, + relationship_exposure_frequency, + relationship_certainty_level, + relationship_cluster_id, + location_id = addresses_locationid, # uuid in case need later for joining of whatever sort. + created_by, # record modification + datetime_created_at, # record modification + updated_by, # record modification + datetime_updated_at # record modification + ) + + #Join in cases that used to be contacts + contacts_clean <- dplyr::bind_rows(contacts_clean, contacts_becoming_cases) + + return(contacts_clean) +} diff --git a/man/clean_contacts.Rd b/man/clean_contacts.Rd new file mode 100644 index 0000000..02de61c --- /dev/null +++ b/man/clean_contacts.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_contacts.R +\name{clean_contacts} +\alias{clean_contacts} +\title{Clean contacts data} +\usage{ +clean_contacts( + contacts, + contacts_address_history_clean, + contacts_vacc_history_clean, + contacts_becoming_cases +) +} +\arguments{ +\item{contacts}{A \code{tibble} containing the contact data.} + +\item{contacts_address_history_clean}{A \code{tibble} containing the cleaned +address history data from contacts (data is cleaned by +\code{\link[=clean_contact_address_history]{clean_contact_address_history()}}.} + +\item{contacts_vacc_history_clean}{A \code{tibble} containing the cleaned +vaccination history data from contacts (data is cleaned by +\code{\link[=clean_contact_vax_history]{clean_contact_vax_history()}}.} + +\item{contacts_becoming_cases}{A \code{tibble} containing the cleaned data on +contacts that became cases (date is produced using +\code{\link[=cases_from_contacts]{cases_from_contacts()}}).} +} +\value{ +A \code{tibble} containing the cleaned case data. +} +\description{ +Cleans and un-nests contact data. Contact data is returned by +\code{\link[=get_contacts]{get_contacts()}}. +} +\examples{ +\dontrun{ + url <- "https://MyGoDataServer.com/" + username <- "myemail@email.com" + password <- "mypassword" + outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + + contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + + locations_clean <- clean_locations(locations = locations) + + # other cleaned data required for `clean_contacts()` + contacts_vacc_history_clean <- clean_contact_vax_history(contacts = contacts) + contacts_address_history_clean <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean + ) + + cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + cases_address_history_clean <- clean_case_address_history(cases = cases) + cases_vacc_history_clean <- clean_case_vax_history(cases = cases) + cases_dateranges_history_clean <- clean_case_med_history(cases = cases) + + cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean + ) + contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) + + contacts_clean <- clean_contacts( + contacts = contacts, + contacts_address_history_clean = cases_address_history_clean, + contacts_vacc_history_clean = cases_vacc_history_clean, + contacts_becoming_cases = contacts_becoming_cases + ) +} +} From 6c9e113a0574986cc110b3e36c13dfb2dbcab784 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:22:29 +0100 Subject: [PATCH 170/203] added dplyr namespace to case_when in clean_cases --- R/clean_cases.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/clean_cases.R b/R/clean_cases.R index e99748c..1f00853 100644 --- a/R/clean_cases.R +++ b/R/clean_cases.R @@ -113,17 +113,17 @@ clean_cases <- function(cases, cases_clean <- dplyr::mutate( .data = cases_clean, - isolated = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ISOLATION"] ~ TRUE, TRUE ~ FALSE) + isolated = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ISOLATION"] ~ TRUE, TRUE ~ FALSE) ) cases_clean <- dplyr::mutate( .data = cases_clean, - hospitalized = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "HOSPITALIZATION"] ~ TRUE, TRUE ~ FALSE) + hospitalized = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "HOSPITALIZATION"] ~ TRUE, TRUE ~ FALSE) ) cases_clean <- dplyr::mutate( .data = cases_clean, - icu = case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ICU_ADMISSION"] ~ TRUE, TRUE ~ FALSE) + icu = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ICU_ADMISSION"] ~ TRUE, TRUE ~ FALSE) ) cases_address_history_clean <- dplyr::filter( @@ -172,7 +172,7 @@ clean_cases <- function(cases, cases_clean <- dplyr::mutate( .data = cases_clean, age_class = factor( - case_when( + dplyr::case_when( age <= 4 ~ "<5", age <= 14 ~ "5-14", age <= 24 ~ "15-24", From 2696fd56ca0ecfead7117216b8f04f0a8fb434c6 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:39:15 +0100 Subject: [PATCH 171/203] fixed clean_case_address_history by adding locations_clean argument to function, updated doc and test --- R/clean_case_address_history.R | 18 ++++++++++++++++-- man/clean_case_address_history.Rd | 18 ++++++++++++++++-- .../testthat/test-clean_case_address_history.R | 15 +++++++++++++-- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/R/clean_case_address_history.R b/R/clean_case_address_history.R index 815d3fd..fb7ac72 100644 --- a/R/clean_case_address_history.R +++ b/R/clean_case_address_history.R @@ -6,6 +6,8 @@ #' #' @param cases A tibble with case data. Case data is returned by #' [`get_cases()`]. +#' @param locations_clean A tibble with cleaned locations data. Locations data +#' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. #' #' @return A tibble with address information from cases data. #' @export @@ -24,9 +26,21 @@ #' outbreak_id = outbreak_id #' ) #' -#' case_address_history <- clean_case_address_history(cases = cases) +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' +#' locations_clean <- clean_locations(locations = locations) +#' +#' case_address_history <- clean_case_address_history( +#' cases = cases, +#' locations_clean = locations_clean +#' ) #' } -clean_case_address_history <- function(cases) { +clean_case_address_history <- function(cases, + locations_clean) { cases_address_history_clean <- dplyr::filter( .data = cases, diff --git a/man/clean_case_address_history.Rd b/man/clean_case_address_history.Rd index cd9369f..5c32b1b 100644 --- a/man/clean_case_address_history.Rd +++ b/man/clean_case_address_history.Rd @@ -4,11 +4,14 @@ \alias{clean_case_address_history} \title{Extract address information from case data} \usage{ -clean_case_address_history(cases) +clean_case_address_history(cases, locations_clean) } \arguments{ \item{cases}{A tibble with case data. Case data is returned by \code{\link[=get_cases]{get_cases()}}.} + +\item{locations_clean}{A tibble with cleaned locations data. Locations data +is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} } \value{ A tibble with address information from cases data. @@ -32,6 +35,17 @@ cases <- get_cases( outbreak_id = outbreak_id ) -case_address_history <- clean_case_address_history(cases = cases) +locations <- get_locations( + url = url, + username = username, + password = password +) + +locations_clean <- clean_locations(locations = locations) + +case_address_history <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean +) } } diff --git a/tests/testthat/test-clean_case_address_history.R b/tests/testthat/test-clean_case_address_history.R index a0c5467..f60c2e1 100644 --- a/tests/testthat/test-clean_case_address_history.R +++ b/tests/testthat/test-clean_case_address_history.R @@ -7,11 +7,22 @@ test_that("clean_case_address_history works as expected", { password = password, outbreak_id = outbreak_id ) - res <- clean_case_address_history(cases = res) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + locations_clean <- clean_locations(locations = locations) + + res <- clean_case_address_history( + cases = res, + locations_clean = locations_clean + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(15L, 14L)) + expect_identical(dim(res), c(16L, 14L)) expect_true( all(c( "id", "visualid", "addresses_locationid", "addresses_typeid", "lat", From a0026634b7fa4a2454314f4248b05039d7a0bf14 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:42:04 +0100 Subject: [PATCH 172/203] fixed clean_locations by fixing typo in return --- R/clean_locations.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/clean_locations.R b/R/clean_locations.R index b008684..1c5afba 100644 --- a/R/clean_locations.R +++ b/R/clean_locations.R @@ -121,5 +121,5 @@ clean_locations <- function(locations) { by = "location_id" ) - return(locations_clean) + return(clean_locations) } From c28d3456591cf3900d554c4dc8291d707c6df47f Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:48:31 +0100 Subject: [PATCH 173/203] updated check_godata_url function, doc and test to use success_code and avoid returning NA --- R/check_godata_url.R | 11 ++++++----- man/check_godata_url.Rd | 5 ++++- tests/testthat/test-check_godata_url.R | 9 +++++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R/check_godata_url.R b/R/check_godata_url.R index f395640..1196961 100644 --- a/R/check_godata_url.R +++ b/R/check_godata_url.R @@ -6,6 +6,8 @@ #' #' @param url Insert the base URL for your instance of Go.Data here. Don't #' forget the forward slash "/" at end! +#' @param success_code A numeric specifying which code is returned by the API +#' when successfully returning the status code. Default is 200. #' #' @return #' Boolean, where `TRUE` indicates a valid URL. @@ -15,16 +17,15 @@ #' check_godata_url(url = url) #' } #' @export -check_godata_url <- function(url) { +check_godata_url <- function(url, + success_code = 200) { # Get status code for version check status_code <- httr::GET(paste0(url, "api/system-settings/version")) status_code <- purrr::pluck(status_code, "status_code") - # create boolean based on status code being 200 (success) - check <- status_code == 200 - - return(check) + # return boolean based on status code being a success + return(isTRUE(status_code == success_code)) } diff --git a/man/check_godata_url.Rd b/man/check_godata_url.Rd index f64d9ce..ad76c37 100644 --- a/man/check_godata_url.Rd +++ b/man/check_godata_url.Rd @@ -4,11 +4,14 @@ \alias{check_godata_url} \title{Check if the provided Go.Data URL is valid} \usage{ -check_godata_url(url) +check_godata_url(url, success_code = 200) } \arguments{ \item{url}{Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!} + +\item{success_code}{A numeric specifying which code is returned by the API +when successfully returning the status code. Default is 200.} } \value{ Boolean, where \code{TRUE} indicates a valid URL. diff --git a/tests/testthat/test-check_godata_url.R b/tests/testthat/test-check_godata_url.R index 80177f8..0022bf4 100644 --- a/tests/testthat/test-check_godata_url.R +++ b/tests/testthat/test-check_godata_url.R @@ -6,3 +6,12 @@ test_that("check_godata_url works as expected", { expect_type(res, "logical") expect_length(res, 1) }) + +test_that("check_godata_url works as expected specifying success code", { + skip("check_godata_url requires API call") + + res <- check_godata_url(url = url, success_code = 200) + + expect_type(res, "logical") + expect_length(res, 1) +}) From bb9c39d1e2d894249a53479d46aa43a8cba2bc3d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 12:59:25 +0100 Subject: [PATCH 174/203] updated check_godata_version to use strsplit, added version names and check --- R/check_godata_version.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/check_godata_version.R b/R/check_godata_version.R index 8cdd754..d59f964 100644 --- a/R/check_godata_version.R +++ b/R/check_godata_version.R @@ -20,18 +20,25 @@ check_godata_version <- function(url = url) { gd_version <- get_godata_version(url = url) # Convert string to vector of 3 numbers - gd_version <- stringr::str_split(gd_version, "[.]") + gd_version <- strsplit(x = gd_version, split = "[.]") gd_version <- as.numeric(unlist(gd_version)) + stopifnot( + "godata version from API does not have major, minor and patch versioning" = + length(gd_version) == 3 + ) + + names(gd_version) <- c("major", "minor", "patch") + # Check if 2.38.1 or later # Should be TRUE if it is version 2.38.1 or later & # FALSE if version 2.38.0 or earlier - if (gd_version[1] < 2) { + if (gd_version["major"] < 2) { return(FALSE) - } else if (gd_version[1] == 2 && gd_version[2] < 38) { + } else if (gd_version["major"] == 2 && gd_version["minor"] < 38) { return(FALSE) - } else if (gd_version[1] == 2 && gd_version[2] == 38 && gd_version[3] == 0) { + } else if (gd_version["major"] == 2 && gd_version["minor"] == 38 && gd_version["patch"] == 0) { return(FALSE) } else { return(TRUE) From c57fbf9a8f184d93297445fdf2998bee2024e069 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 13:06:22 +0100 Subject: [PATCH 175/203] updated batch_downloader test --- tests/testthat/test-batch_downloader.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-batch_downloader.R b/tests/testthat/test-batch_downloader.R index 0d5168e..52daf07 100644 --- a/tests/testthat/test-batch_downloader.R +++ b/tests/testthat/test-batch_downloader.R @@ -15,7 +15,7 @@ test_that("batch_downloader works as expected", { expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(13L, 153L)) + expect_identical(dim(res), c(14L, 153L)) expect_identical( colnames(res), c("firstName", "gender", "wasContact", "safeBurial", "classification", From 6c9fe8c7248a882868cb0d9cdcbaaf32bdb1e9a1 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 14:11:59 +0100 Subject: [PATCH 176/203] fixed clean_cases test --- tests/testthat/test-clean_cases.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-clean_cases.R b/tests/testthat/test-clean_cases.R index a29ce82..a98f56b 100644 --- a/tests/testthat/test-clean_cases.R +++ b/tests/testthat/test-clean_cases.R @@ -8,8 +8,19 @@ test_that("clean_cases works as expected", { outbreak_id = outbreak_id ) + locations <- get_locations( + url = url, + username = username, + password = password + ) + + locations_clean <- clean_locations(locations = locations) + cases_vacc_history_clean <- clean_case_vax_history(cases = cases) - cases_address_history_clean <- clean_case_address_history(cases = cases) + cases_address_history_clean <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean + ) cases_dateranges_history_clean <- clean_case_med_history(cases = cases) res <- clean_cases( From 542109403a693c73d09b53a0a54391c406f3766e Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 14:14:27 +0100 Subject: [PATCH 177/203] updated clean_case_address_history --- tests/testthat/test-clean_case_address_history.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-clean_case_address_history.R b/tests/testthat/test-clean_case_address_history.R index f60c2e1..f1ed978 100644 --- a/tests/testthat/test-clean_case_address_history.R +++ b/tests/testthat/test-clean_case_address_history.R @@ -1,7 +1,7 @@ test_that("clean_case_address_history works as expected", { skip("get_cases requires API call") - res <- get_cases( + cases <- get_cases( url = url, username = username, password = password, @@ -16,7 +16,7 @@ test_that("clean_case_address_history works as expected", { locations_clean <- clean_locations(locations = locations) res <- clean_case_address_history( - cases = res, + cases = cases, locations_clean = locations_clean ) From 649f5fb6c09f9bece6e87ebe9c84eddcbb7343f3 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 14:16:35 +0100 Subject: [PATCH 178/203] fixed cases_from_contacts test --- tests/testthat/test-cases_from_contacts.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-cases_from_contacts.R b/tests/testthat/test-cases_from_contacts.R index f5c3570..cd09e3e 100644 --- a/tests/testthat/test-cases_from_contacts.R +++ b/tests/testthat/test-cases_from_contacts.R @@ -8,8 +8,19 @@ test_that("cases_from_contacts works as expected", { outbreak_id = outbreak_id ) + locations <- get_locations( + url = url, + username = username, + password = password + ) + + locations_clean <- clean_locations(locations = locations) + cases_vacc_history_clean <- clean_case_vax_history(cases = cases) - cases_address_history_clean <- clean_case_address_history(cases = cases) + cases_address_history_clean <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean + ) cases_dateranges_history_clean <- clean_case_med_history(cases = cases) cases_clean <- clean_cases( From fe8b34ed6e998d4cb341772c4afcc64b5e34d3f4 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 14:44:47 +0100 Subject: [PATCH 179/203] fixed export_downloader test --- tests/testthat/test-export_downloader.R | 50 ++++++++++++++----------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/tests/testthat/test-export_downloader.R b/tests/testthat/test-export_downloader.R index 424ecbb..48ad43c 100644 --- a/tests/testthat/test-export_downloader.R +++ b/tests/testthat/test-export_downloader.R @@ -15,7 +15,7 @@ test_that("export_downloader works as expected", { expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(13L, 357L)) + expect_identical(dim(res), c(14L, 363L)) expect_true( all(c( "id", "visualId", "dateOfReporting", "isDateOfReportingApproximate", @@ -26,16 +26,20 @@ test_that("export_downloader works as expected", { "outcomeId", "dateOfOutcome", "documents", "type", "dateRanges", "transferRefused", "addresses", "safeBurial", "dateOfBurial", "isDateOfOnsetApproximate", "numberOfExposures", "numberOfContacts", - "burialLocationId", "burialLocationId Identifiers", + "deathLocationId", "deathLocationId Identifiers", + "deathLocationId Location geographical level", + "deathLocationId Parent location", "burialLocationId", + "burialLocationId Identifiers", "burialLocationId Location geographical level", "burialLocationId Parent location", "burialPlaceName", "investigationStatus", "dateInvestigationCompleted", "vaccinesReceived", - "pregnancyStatus", "responsibleUserId", "age.years", "age.months" + "pregnancyStatus", "age.years", "age.months", "responsibleUser.firstName", + "responsibleUser.lastName", "responsibleUser.id" ) %in% colnames(res)) ) expect_true( - all(grepl(pattern = "^questionnaireAnswers", x = colnames(res)[50:357])) + all(grepl(pattern = "^questionnaireAnswers", x = colnames(res)[56:363])) ) expect_identical( @@ -47,14 +51,15 @@ test_that("export_downloader works as expected", { "character", "character", "character", "character", "character", "list", "character", "list", "logical", "list", "logical", "logical", "logical", "integer", "integer", "logical", "list", "list", "list", "logical", - "character", "logical", "list", "character", "character", "integer", - "integer", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "logical", "character", "logical", "list", + "character", "integer", "integer", "character", "character", "character", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "list", "logical", "logical", "logical", "logical", "logical", "logical", "logical", - "logical", "logical", "logical", "logical", "list", "list", "list", - "list", "list", "list", "logical", "logical", "logical", "logical", + "logical", "logical", "logical", "list", "list", "list", "list", "list", + "list", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", @@ -65,24 +70,23 @@ test_that("export_downloader works as expected", { "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", - "logical", "logical", "logical", "logical", "list", "list", "list", - "list", "list", "list", "list", "logical", "logical", "logical", - "logical", "logical", "logical", "logical", "list", "list", "list", - "list", "list", "list", "list", "list", "list", "list", "logical", - "list", "list", "list", "list", "logical", "logical", "logical", + "logical", "logical", "list", "list", "list", "list", "list", "list", + "list", "logical", "logical", "logical", "logical", "logical", "logical", + "logical", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "logical", "list", "list", "list", "list", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", - "logical", "list", "logical", "logical", "list", "logical", "logical", - "logical", "list", "logical", "list", "logical", "list", "logical", - "list", "logical", "list", "logical", "list", "logical", "list", "list", - "list", "list", "list", "list", "list", "list", "list", "list", "list", - "list", "list", "list", "list", "logical", "list", "logical", "list", - "logical", "logical", "logical", "logical", "list", "logical", "logical", - "logical", "logical", "list", "logical", "list", "logical", "list", + "logical", "logical", "logical", "list", "logical", "logical", "list", + "logical", "logical", "logical", "list", "logical", "list", "logical", "list", "logical", "list", "logical", "list", "logical", "list", - "logical", "list", "logical", "logical", "logical", "logical", "logical", + "logical", "list", "list", "list", "list", "list", "list", "list", "list", + "list", "list", "list", "list", "list", "list", "list", "logical", "list", + "logical", "list", "logical", "logical", "logical", "logical", "list", + "logical", "logical", "logical", "logical", "list", "logical", "list", + "logical", "list", "list", "logical", "list", "logical", "list", + "logical", "list", "logical", "list", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", @@ -93,5 +97,7 @@ test_that("export_downloader works as expected", { "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", "logical", - "logical", "logical", "logical", "list", "list", "list")) + "logical", "logical", "logical", "logical", "logical", "list", "list", + "list") + ) }) From b8344d7d944aa0b8a5fa65cc66cc520554449294 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 14:46:32 +0100 Subject: [PATCH 180/203] fixed get_all_outbreaks test --- tests/testthat/test-get_all_outbreaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_all_outbreaks.R b/tests/testthat/test-get_all_outbreaks.R index 06f40f9..4530c3f 100644 --- a/tests/testthat/test-get_all_outbreaks.R +++ b/tests/testthat/test-get_all_outbreaks.R @@ -9,7 +9,7 @@ test_that("get_all_outbreaks works as expected", { expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(10L, 5L)) + expect_identical(dim(res), c(11L, 5L)) expect_identical( colnames(res), c("id", "name", "description", "createdBy", "createdAt") From f05251fce23ffdb42f962910d008f7668940e07d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 16:14:54 +0100 Subject: [PATCH 181/203] added first draft of get_cases_questionnaire --- R/get_cases_questionnaire.R | 86 ++++++++++++++++++++++++++++++++++ man/get_cases_questionnaire.Rd | 64 +++++++++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 R/get_cases_questionnaire.R create mode 100644 man/get_cases_questionnaire.Rd diff --git a/R/get_cases_questionnaire.R b/R/get_cases_questionnaire.R new file mode 100644 index 0000000..171060e --- /dev/null +++ b/R/get_cases_questionnaire.R @@ -0,0 +1,86 @@ +#' Download cases from Go.Data and returns questionnaire fields +#' +#' A function that retrieves the questionnaire fields from case data for a +#' specific `outbreak_id`. +#' +#' Unlike [`get_cases()`] this function only uses the [`export_downloader()`], +#' and not the [`batch_downloader()`]. Therefore, this function will only work +#' on Go.Data versions 2.38.1 or newer. This method relies on the GET +#' outbreak/{id}/cases/export API endpoint. An export request is submitted to +#' the server, and then when the export is ready, it will be downloaded. +#' +#' This function fixes the file return type to `"csv"`. +#' +#' @param url Insert the base URL for your instance of Go.Data here. +#' Don't forget the forward slash "/" at end! +#' @param username The email address for your Go.Data login. +#' @param password The password for your Go.Data login +#' @param outbreak_id The id number for the outbreak for which you want to +#' download data. +#' @param method The method to download data. `method = "export"` is the +#' preferred and default method for Go.Data version 2.38.1 or later. +#' See Details. +#' @param batch_size If `method = "batches"`, then `batch_size` specifies the +#' number of records to retrieve in each iteration. +#' @param wait If `method = "export"`, then `wait` is the number of seconds to +#' wait in between iterations of checking the status of the export. +#' @param file_type If `method = "export"`, then `file_type` determines Whether +#' the resulting data frame should contain nested fields (`file_type = "json"`, +#' the default) or an entirely flat data structure (`file_type = "csv"`) +#' +#' @return Returns a `tibble`. +#' @export +#' +#' @examples +#' \dontrun{ +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' +#' cases <- get_cases_questionnaire( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' } +get_cases_questionnaire <- function(url, + username, + password, + outbreak_id, + wait = 2) { + + #Check that outbreak_id is active + active_outbreak_id <- get_active_outbreak( + url = url, + username = username, + password = password + ) + if (outbreak_id != active_outbreak_id) { + set_active_outbreak( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + } + + #Submit an export request to the system + api_call_request <- paste0( + url, "api/outbreaks/", outbreak_id, "/cases/export" + ) + df <- export_downloader( + url = url, + username = username, + password = password, + api_call_request = api_call_request, + file_type = "csv", + wait = wait + ) + + # subset columns to questionnaire + df_questionnaire <- df[, grep(pattern = "FA", x = colnames(df))] + + return(tibble::as_tibble(df_questionnaire)) +} diff --git a/man/get_cases_questionnaire.Rd b/man/get_cases_questionnaire.Rd new file mode 100644 index 0000000..53c1583 --- /dev/null +++ b/man/get_cases_questionnaire.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cases_questionnaire.R +\name{get_cases_questionnaire} +\alias{get_cases_questionnaire} +\title{Download cases from Go.Data and returns questionnaire fields} +\usage{ +get_cases_questionnaire(url, username, password, outbreak_id, wait = 2) +} +\arguments{ +\item{url}{Insert the base URL for your instance of Go.Data here. +Don't forget the forward slash "/" at end!} + +\item{username}{The email address for your Go.Data login.} + +\item{password}{The password for your Go.Data login} + +\item{outbreak_id}{The id number for the outbreak for which you want to +download data.} + +\item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to +wait in between iterations of checking the status of the export.} + +\item{method}{The method to download data. \code{method = "export"} is the +preferred and default method for Go.Data version 2.38.1 or later. +See Details.} + +\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the +number of records to retrieve in each iteration.} + +\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether +the resulting data frame should contain nested fields (\code{file_type = "json"}, +the default) or an entirely flat data structure (\code{file_type = "csv"})} +} +\value{ +Returns a \code{tibble}. +} +\description{ +A function that retrieves the questionnaire fields from case data for a +specific \code{outbreak_id}. +} +\details{ +Unlike \code{\link[=get_cases]{get_cases()}} this function only uses the \code{\link[=export_downloader]{export_downloader()}}, +and not the \code{\link[=batch_downloader]{batch_downloader()}}. Therefore, this function will only work +on Go.Data versions 2.38.1 or newer. This method relies on the GET +outbreak/{id}/cases/export API endpoint. An export request is submitted to +the server, and then when the export is ready, it will be downloaded. + +This function fixes the file return type to \code{"csv"}. +} +\examples{ +\dontrun{ +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" + +cases <- get_cases_questionnaire( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) +} +} From fbaeeac3b07bc348cf69519478ea31c66b6d139b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 31 Mar 2023 16:23:35 +0100 Subject: [PATCH 182/203] updated get_cases_questionnaire documentation --- R/get_cases_questionnaire.R | 8 -------- man/get_cases_questionnaire.Rd | 11 ----------- 2 files changed, 19 deletions(-) diff --git a/R/get_cases_questionnaire.R b/R/get_cases_questionnaire.R index 171060e..2ba125d 100644 --- a/R/get_cases_questionnaire.R +++ b/R/get_cases_questionnaire.R @@ -17,16 +17,8 @@ #' @param password The password for your Go.Data login #' @param outbreak_id The id number for the outbreak for which you want to #' download data. -#' @param method The method to download data. `method = "export"` is the -#' preferred and default method for Go.Data version 2.38.1 or later. -#' See Details. -#' @param batch_size If `method = "batches"`, then `batch_size` specifies the -#' number of records to retrieve in each iteration. #' @param wait If `method = "export"`, then `wait` is the number of seconds to #' wait in between iterations of checking the status of the export. -#' @param file_type If `method = "export"`, then `file_type` determines Whether -#' the resulting data frame should contain nested fields (`file_type = "json"`, -#' the default) or an entirely flat data structure (`file_type = "csv"`) #' #' @return Returns a `tibble`. #' @export diff --git a/man/get_cases_questionnaire.Rd b/man/get_cases_questionnaire.Rd index 53c1583..658dd83 100644 --- a/man/get_cases_questionnaire.Rd +++ b/man/get_cases_questionnaire.Rd @@ -19,17 +19,6 @@ download data.} \item{wait}{If \code{method = "export"}, then \code{wait} is the number of seconds to wait in between iterations of checking the status of the export.} - -\item{method}{The method to download data. \code{method = "export"} is the -preferred and default method for Go.Data version 2.38.1 or later. -See Details.} - -\item{batch_size}{If \code{method = "batches"}, then \code{batch_size} specifies the -number of records to retrieve in each iteration.} - -\item{file_type}{If \code{method = "export"}, then \code{file_type} determines Whether -the resulting data frame should contain nested fields (\code{file_type = "json"}, -the default) or an entirely flat data structure (\code{file_type = "csv"})} } \value{ Returns a \code{tibble}. From e6be6076d08b060a99be1bf94e614ec9c55e1488 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 15:20:16 +0100 Subject: [PATCH 183/203] updated documentation --- NAMESPACE | 20 +++++++++++++++++++ man/clean_case_vax_history.Rd | 2 +- ...an_contacts_of_contacts_address_history.Rd | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2120a66..947f78b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,32 @@ # Generated by roxygen2: do not edit by hand +export(cases_from_contacts) export(check_godata_url) +export(clean_case_address_history) +export(clean_case_med_history) +export(clean_case_vax_history) +export(clean_cases) +export(clean_contact_address_history) +export(clean_contact_vax_history) +export(clean_contacts) +export(clean_contacts_of_contacts) +export(clean_contacts_of_contacts_address_history) +export(clean_contacts_of_contacts_vax_history) +export(clean_events) +export(clean_followups) +export(clean_locations) +export(clean_relationships) +export(clean_teams) +export(clean_users) +export(contacts_per_case) export(expand_location_tree) +export(exposures_per_case) export(get_access_token) export(get_active_outbreak) export(get_all_outbreaks) export(get_cases) export(get_cases_epiwindow) +export(get_cases_questionnaire) export(get_clusters) export(get_contacts) export(get_contacts_epiwindow) diff --git a/man/clean_case_vax_history.Rd b/man/clean_case_vax_history.Rd index c72e851..95f6570 100644 --- a/man/clean_case_vax_history.Rd +++ b/man/clean_case_vax_history.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/clean_case_vax_history.R \name{clean_case_vax_history} \alias{clean_case_vax_history} -\title{Cleans vaccinations data from case data} +\title{Cleans vaccination data from case data} \usage{ clean_case_vax_history(cases) } diff --git a/man/clean_contacts_of_contacts_address_history.Rd b/man/clean_contacts_of_contacts_address_history.Rd index a4923b4..2896a5f 100644 --- a/man/clean_contacts_of_contacts_address_history.Rd +++ b/man/clean_contacts_of_contacts_address_history.Rd @@ -45,7 +45,7 @@ locations <- get_locations( ) locations_clean <- clean_locations(locations = locations) -contact_of_contacts_address_history <- clean_contacts_of_contacts_address_history( +contact_of_contacts_add_hist <- clean_contacts_of_contacts_address_history( contacts_of_contacts = contacts_of_contacts, locations_clean = locations_clean ) From 7084b7167490257d27e2bf46f53c4ec1064667b4 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 15:49:24 +0100 Subject: [PATCH 184/203] used translation function in clean_case_address_history --- R/clean_case_address_history.R | 21 ++++++++++++++----- man/clean_case_address_history.Rd | 15 +++++++++++-- .../test-clean_case_address_history.R | 10 ++++++++- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/R/clean_case_address_history.R b/R/clean_case_address_history.R index fb7ac72..2be9f22 100644 --- a/R/clean_case_address_history.R +++ b/R/clean_case_address_history.R @@ -8,6 +8,8 @@ #' [`get_cases()`]. #' @param locations_clean A tibble with cleaned locations data. Locations data #' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A tibble with address information from cases data. #' @export @@ -34,13 +36,22 @@ #' #' locations_clean <- clean_locations(locations = locations) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' case_address_history <- clean_case_address_history( #' cases = cases, -#' locations_clean = locations_clean +#' locations_clean = locations_clean, +#' language_tokens = language_tokens #' ) #' } clean_case_address_history <- function(cases, - locations_clean) { + locations_clean, + language_tokens) { cases_address_history_clean <- dplyr::filter( .data = cases, @@ -67,9 +78,9 @@ clean_case_address_history <- function(cases, purrr::negate(is.list) ) - cases_address_history_clean <- dplyr::mutate( - .data = cases_address_history_clean, - addresses_typeid = sub(".*TYPE_", "", .data$addresses_typeid) + cases_address_history_clean <- translate_categories( + data = cases_address_history_clean, + language_tokens = language_tokens ) cases_address_history_clean <- dplyr::left_join( diff --git a/man/clean_case_address_history.Rd b/man/clean_case_address_history.Rd index 5c32b1b..cab3aea 100644 --- a/man/clean_case_address_history.Rd +++ b/man/clean_case_address_history.Rd @@ -4,7 +4,7 @@ \alias{clean_case_address_history} \title{Extract address information from case data} \usage{ -clean_case_address_history(cases, locations_clean) +clean_case_address_history(cases, locations_clean, language_tokens) } \arguments{ \item{cases}{A tibble with case data. Case data is returned by @@ -12,6 +12,9 @@ clean_case_address_history(cases, locations_clean) \item{locations_clean}{A tibble with cleaned locations data. Locations data is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A tibble with address information from cases data. @@ -43,9 +46,17 @@ locations <- get_locations( locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + case_address_history <- clean_case_address_history( cases = cases, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) } } diff --git a/tests/testthat/test-clean_case_address_history.R b/tests/testthat/test-clean_case_address_history.R index f1ed978..97a1363 100644 --- a/tests/testthat/test-clean_case_address_history.R +++ b/tests/testthat/test-clean_case_address_history.R @@ -15,9 +15,17 @@ test_that("clean_case_address_history works as expected", { ) locations_clean <- clean_locations(locations = locations) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + res <- clean_case_address_history( cases = cases, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) expect_s3_class(res, "tbl_df") From fe63123fb1da4cb8ebe12534235d8a6933caf078 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 15:57:30 +0100 Subject: [PATCH 185/203] used translation function in clean_case_med_history --- R/clean_case_med_history.R | 28 +++++++++++++------- man/clean_case_med_history.Rd | 17 ++++++++++-- tests/testthat/test-clean_case_med_history.R | 15 +++++++++-- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/R/clean_case_med_history.R b/R/clean_case_med_history.R index 82c335e..c9d802b 100644 --- a/R/clean_case_med_history.R +++ b/R/clean_case_med_history.R @@ -5,6 +5,8 @@ #' #' @param cases A tibble with case data. Case data is returned by #' [`get_cases()`]. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A tibble with information on isolation and hospitalization history. #' @export @@ -23,9 +25,20 @@ #' outbreak_id = outbreak_id #' ) #' -#' cases_med_history <- clean_case_med_history(cases = cases) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' cases_med_history <- clean_case_med_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) #' } -clean_case_med_history <- function(cases) { +clean_case_med_history <- function(cases, + language_tokens) { cases_dateranges_history_clean <- dplyr::filter( .data = cases, @@ -54,14 +67,9 @@ clean_case_med_history <- function(cases) { tolower ) - cases_dateranges_history_clean <- dplyr::mutate( - .data = cases_dateranges_history_clean, - dateranges_typeid = sub(".*TYPE_", "", dateranges_typeid) - ) - - cases_dateranges_history_clean <- dplyr::mutate( - .data = cases_dateranges_history_clean, - dateranges_centername = sub(".*NAME_", "", dateranges_centername) + cases_dateranges_history_clean <- translate_categories( + data = cases_dateranges_history_clean, + language_tokens = language_tokens ) cases_dateranges_history_clean <- dplyr::mutate_at( diff --git a/man/clean_case_med_history.Rd b/man/clean_case_med_history.Rd index bf1fa66..120ce53 100644 --- a/man/clean_case_med_history.Rd +++ b/man/clean_case_med_history.Rd @@ -4,11 +4,14 @@ \alias{clean_case_med_history} \title{Extracts and cleans medical history from case data} \usage{ -clean_case_med_history(cases) +clean_case_med_history(cases, language_tokens) } \arguments{ \item{cases}{A tibble with case data. Case data is returned by \code{\link[=get_cases]{get_cases()}}.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A tibble with information on isolation and hospitalization history. @@ -31,6 +34,16 @@ cases <- get_cases( outbreak_id = outbreak_id ) -cases_med_history <- clean_case_med_history(cases = cases) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +cases_med_history <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens +) } } diff --git a/tests/testthat/test-clean_case_med_history.R b/tests/testthat/test-clean_case_med_history.R index 895dcd0..55cd50d 100644 --- a/tests/testthat/test-clean_case_med_history.R +++ b/tests/testthat/test-clean_case_med_history.R @@ -1,13 +1,24 @@ test_that("clean_case_med_history works as expected", { skip("get_cases requires API call") - res <- get_cases( + cases <- get_cases( url = url, username = username, password = password, outbreak_id = outbreak_id ) - res <- clean_case_med_history(cases = res) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") From 7ed2c2544f4bb49de56b963572cc9ada1ea50116 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 16:04:11 +0100 Subject: [PATCH 186/203] added translation function to clean_case_vax_history --- R/clean_case_vax_history.R | 28 +++++++++++++------- man/clean_case_vax_history.Rd | 17 ++++++++++-- tests/testthat/test-clean_case_vax_history.R | 15 +++++++++-- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/R/clean_case_vax_history.R b/R/clean_case_vax_history.R index d70f9c8..3c30f58 100644 --- a/R/clean_case_vax_history.R +++ b/R/clean_case_vax_history.R @@ -4,6 +4,8 @@ #' complete, from case data. Case data is returned from [`get_cases()`]. #' #' @param cases A tibble with address information from cases data. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A tibble with cleaned and un-nested vaccination history data. #' @export @@ -22,9 +24,20 @@ #' outbreak_id = outbreak_id #' ) #' -#' vax_history <- clean_case_vax_history(cases = cases) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' vax_history <- clean_case_vax_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) #' } -clean_case_vax_history <- function(cases) { +clean_case_vax_history <- function(cases, + language_tokens) { cases_vacc_history_clean <- dplyr::filter( .data = cases, @@ -59,14 +72,9 @@ clean_case_vax_history <- function(cases) { .fn = tolower ) - cases_vacc_history_clean <- dplyr::mutate( - .data = cases_vacc_history_clean, - vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) - ) - - cases_vacc_history_clean <- dplyr::mutate( - .data = cases_vacc_history_clean, - vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + cases_vacc_history_clean <- translate_categories( + data = cases_vacc_history_clean, + language_tokens = language_tokens ) cases_vacc_history_clean <- dplyr::mutate( diff --git a/man/clean_case_vax_history.Rd b/man/clean_case_vax_history.Rd index 95f6570..710d73d 100644 --- a/man/clean_case_vax_history.Rd +++ b/man/clean_case_vax_history.Rd @@ -4,10 +4,13 @@ \alias{clean_case_vax_history} \title{Cleans vaccination data from case data} \usage{ -clean_case_vax_history(cases) +clean_case_vax_history(cases, language_tokens) } \arguments{ \item{cases}{A tibble with address information from cases data.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A tibble with cleaned and un-nested vaccination history data. @@ -30,6 +33,16 @@ cases <- get_cases( outbreak_id = outbreak_id ) -vax_history <- clean_case_vax_history(cases = cases) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +vax_history <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens +) } } diff --git a/tests/testthat/test-clean_case_vax_history.R b/tests/testthat/test-clean_case_vax_history.R index c2b9fc2..6372f45 100644 --- a/tests/testthat/test-clean_case_vax_history.R +++ b/tests/testthat/test-clean_case_vax_history.R @@ -1,13 +1,24 @@ test_that("clean_case_vax_history works as expected", { skip("get_cases requires API call") - res <- get_cases( + cases <- get_cases( url = url, username = username, password = password, outbreak_id = outbreak_id ) - res <- clean_case_vax_history(cases = res) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") From 4e27174ef2ccc3dfaab4a5995a111a2e19e5767a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 16:33:22 +0100 Subject: [PATCH 187/203] used translation function in clean_cases --- R/clean_cases.R | 64 +++++++++++++++++++++++-------- man/clean_cases.Rd | 42 +++++++++++++++++--- tests/testthat/test-clean_cases.R | 23 +++++++++-- 3 files changed, 103 insertions(+), 26 deletions(-) diff --git a/R/clean_cases.R b/R/clean_cases.R index 1f00853..27932ed 100644 --- a/R/clean_cases.R +++ b/R/clean_cases.R @@ -4,6 +4,10 @@ #' [`get_cases()`]. #' #' @param cases A `tibble` containing the case data. +#' @param locations_clean A tibble with cleaned locations data. Locations data +#' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A `tibble` containing the cleaned case data. #' @export @@ -22,22 +26,48 @@ #' outbreak_id = outbreak_id #' ) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) +#' locations_clean <- clean_locations(locations = locations) +#' #' # other cleaned data required for `clean_cases()` -#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases) -#' cases_address_history_clean <- clean_case_address_history(cases = cases) -#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +#' cases_vacc_history_clean <- clean_case_vax_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) +#' cases_address_history_clean <- clean_case_address_history( +#' cases = cases, +#' locations_clean = locations_clean, +#' language_tokens = language_tokens +#' ) +#' cases_dateranges_history_clean <- clean_case_med_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) #' #' cases_clean <- clean_cases( #' cases = cases, #' cases_address_history_clean = cases_address_history_clean, #' cases_vacc_history_clean = cases_vacc_history_clean, -#' cases_dateranges_history_clean = cases_dateranges_history_clean +#' cases_dateranges_history_clean = cases_dateranges_history_clean, +#' language_tokens = language_tokens #' ) #' } clean_cases <- function(cases, cases_address_history_clean, cases_vacc_history_clean, - cases_dateranges_history_clean) { + cases_dateranges_history_clean, + language_tokens) { # Remove all deleted records cases_clean <- dplyr::filter( @@ -100,35 +130,35 @@ clean_cases <- function(cases, datetime_created_at = as.POSIXct(datetime_created_at,format="%Y-%m-%dT%H:%M") ) - # truncate responses of categorical vars so easier to read - cases_clean <- dplyr::mutate( + # translate responses of categorical vars so easier to read + cases_clean <- translate_categories( + data = cases_clean, + language_tokens = language_tokens + ) + + cases_clean <- dplyr::rename( .data = cases_clean, - classification = sub(".*CLASSIFICATION_", "", classification), - gender = sub(".*GENDER_", "", gender), - occupation = sub(".*OCCUPATION_", "", occupation), - outcome = sub(".*OUTCOME_", "", outcome_id), - pregnancy_status = sub(".*STATUS_", "", pregnancy_status), - risk_level = sub(".*LEVEL_", "", risk_level) + outcome = "outcome_id" ) cases_clean <- dplyr::mutate( .data = cases_clean, - isolated = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ISOLATION"] ~ TRUE, TRUE ~ FALSE) + isolated = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "Isolation"] ~ TRUE, TRUE ~ FALSE) ) cases_clean <- dplyr::mutate( .data = cases_clean, - hospitalized = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "HOSPITALIZATION"] ~ TRUE, TRUE ~ FALSE) + hospitalized = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "Hospitalization"] ~ TRUE, TRUE ~ FALSE) ) cases_clean <- dplyr::mutate( .data = cases_clean, - icu = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ICU_ADMISSION"] ~ TRUE, TRUE ~ FALSE) + icu = dplyr::case_when(id %in% cases_dateranges_history_clean$id[cases_dateranges_history_clean$dateranges_typeid == "ICU Admission"] ~ TRUE, TRUE ~ FALSE) ) cases_address_history_clean <- dplyr::filter( .data = cases_address_history_clean, - addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + addresses_typeid == "Current address" ) # join in current address from address history, only current place of residence diff --git a/man/clean_cases.Rd b/man/clean_cases.Rd index 96b9a54..4e86764 100644 --- a/man/clean_cases.Rd +++ b/man/clean_cases.Rd @@ -8,11 +8,18 @@ clean_cases( cases, cases_address_history_clean, cases_vacc_history_clean, - cases_dateranges_history_clean + cases_dateranges_history_clean, + language_tokens ) } \arguments{ \item{cases}{A \code{tibble} containing the case data.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} + +\item{locations_clean}{A tibble with cleaned locations data. Locations data +is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} } \value{ A \code{tibble} containing the cleaned case data. @@ -35,16 +42,41 @@ cases <- get_cases( outbreak_id = outbreak_id ) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +locations <- get_locations( + url = url, + username = username, + password = password +) +locations_clean <- clean_locations(locations = locations) + # other cleaned data required for `clean_cases()` -cases_vacc_history_clean <- clean_case_vax_history(cases = cases) -cases_address_history_clean <- clean_case_address_history(cases = cases) -cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +cases_vacc_history_clean <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens +) +cases_address_history_clean <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean, + language_tokens = language_tokens +) +cases_dateranges_history_clean <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens +) cases_clean <- clean_cases( cases = cases, cases_address_history_clean = cases_address_history_clean, cases_vacc_history_clean = cases_vacc_history_clean, - cases_dateranges_history_clean = cases_dateranges_history_clean + cases_dateranges_history_clean = cases_dateranges_history_clean, + language_tokens = language_tokens ) } } diff --git a/tests/testthat/test-clean_cases.R b/tests/testthat/test-clean_cases.R index a98f56b..8a7445d 100644 --- a/tests/testthat/test-clean_cases.R +++ b/tests/testthat/test-clean_cases.R @@ -8,6 +8,13 @@ test_that("clean_cases works as expected", { outbreak_id = outbreak_id ) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + locations <- get_locations( url = url, username = username, @@ -16,18 +23,26 @@ test_that("clean_cases works as expected", { locations_clean <- clean_locations(locations = locations) - cases_vacc_history_clean <- clean_case_vax_history(cases = cases) + cases_vacc_history_clean <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens + ) cases_address_history_clean <- clean_case_address_history( cases = cases, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens + ) + cases_dateranges_history_clean <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens ) - cases_dateranges_history_clean <- clean_case_med_history(cases = cases) res <- clean_cases( cases = cases, cases_address_history_clean = cases_address_history_clean, cases_vacc_history_clean = cases_vacc_history_clean, - cases_dateranges_history_clean = cases_dateranges_history_clean + cases_dateranges_history_clean = cases_dateranges_history_clean, + language_tokens = language_tokens ) expect_s3_class(res, "tbl_df") From 5bdef5442dcb1840c4e27fa65ca2c35fbad28da7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 16:50:39 +0100 Subject: [PATCH 188/203] used translation function in clean_contact_address_history --- R/clean_contact_address_history.R | 21 ++++++++++++++----- man/clean_contact_address_history.Rd | 15 +++++++++++-- .../test-clean_contact_address_history.R | 12 +++++++++-- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/R/clean_contact_address_history.R b/R/clean_contact_address_history.R index 15222b2..754cae6 100644 --- a/R/clean_contact_address_history.R +++ b/R/clean_contact_address_history.R @@ -8,6 +8,8 @@ #' [`get_contacts()`]. #' @param locations_clean A tibble with cleaned locations data. Locations data #' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A tibble with address information from contacts data. #' @export @@ -33,13 +35,22 @@ #' ) #' locations_clean <- clean_locations(locations = locations) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' contact_address_history <- clean_contact_address_history( #' contacts = contacts, -#' locations_clean = locations_clean +#' locations_clean = locations_clean, +#' language_tokens = language_tokens #' ) #' } clean_contact_address_history <- function(contacts, - locations_clean) { + locations_clean, + language_tokens) { contacts_address_history_clean <- dplyr::filter( .data = contacts, @@ -67,9 +78,9 @@ clean_contact_address_history <- function(contacts, purrr::negate(is.list) ) - contacts_address_history_clean <- dplyr::mutate( - .data = contacts_address_history_clean, - addresses_typeid = sub(".*TYPE_", "", addresses_typeid) + contacts_address_history_clean <- translate_categories( + data = contacts_address_history_clean, + language_tokens = language_tokens ) contacts_address_history_clean <- dplyr::left_join( diff --git a/man/clean_contact_address_history.Rd b/man/clean_contact_address_history.Rd index 1209f5a..16b3c8e 100644 --- a/man/clean_contact_address_history.Rd +++ b/man/clean_contact_address_history.Rd @@ -4,7 +4,7 @@ \alias{clean_contact_address_history} \title{Extracts address information from contact data} \usage{ -clean_contact_address_history(contacts, locations_clean) +clean_contact_address_history(contacts, locations_clean, language_tokens) } \arguments{ \item{contacts}{A tibble with contacts data. Contacts data is returned by @@ -12,6 +12,9 @@ clean_contact_address_history(contacts, locations_clean) \item{locations_clean}{A tibble with cleaned locations data. Locations data is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A tibble with address information from contacts data. @@ -42,9 +45,17 @@ locations <- get_locations( ) locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + contact_address_history <- clean_contact_address_history( contacts = contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) } } diff --git a/tests/testthat/test-clean_contact_address_history.R b/tests/testthat/test-clean_contact_address_history.R index 50930ab..d5786f0 100644 --- a/tests/testthat/test-clean_contact_address_history.R +++ b/tests/testthat/test-clean_contact_address_history.R @@ -15,14 +15,22 @@ test_that("clean_contact_address_history works as expected", { ) locations_clean <- clean_locations(locations = locations) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + res <- clean_contact_address_history( contacts = contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(15L, 13L)) + expect_identical(dim(res), c(16L, 13L)) expect_true( all(c( "id", "addresses_locationid", "addresses_typeid", "lat", "long", From c9460d5cf4fa7a4d3fb0a51fe1a4dee8f6b32573 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 16:56:49 +0100 Subject: [PATCH 189/203] used translation function in clean_contact_vax_history --- R/clean_contact_vax_history.R | 28 ++++++++++++------- man/clean_contact_vax_history.Rd | 17 +++++++++-- .../testthat/test-clean_contact_vax_history.R | 12 +++++++- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/R/clean_contact_vax_history.R b/R/clean_contact_vax_history.R index 32ea9da..3125dfc 100644 --- a/R/clean_contact_vax_history.R +++ b/R/clean_contact_vax_history.R @@ -5,6 +5,8 @@ #' [`get_contacts()`]. #' #' @param contacts A tibble with address information from contact data. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A tibble with cleaned and un-nested vaccination history data. #' @export @@ -23,9 +25,20 @@ #' outbreak_id = outbreak_id #' ) #' -#' vax_history <- clean_contact_vax_history(contacts = contacts) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' vax_history <- clean_contact_vax_history( +#' contacts = contacts, +#' language_tokens = language_tokens +#' ) #' } -clean_contact_vax_history <- function(contacts) { +clean_contact_vax_history <- function(contacts, + language_tokens) { contacts_vax_history_clean <- dplyr::filter( .data = contacts, @@ -58,14 +71,9 @@ clean_contact_vax_history <- function(contacts) { .funs = tolower ) - contacts_vax_history_clean <- dplyr::mutate( - .data = contacts_vax_history_clean, - vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) - ) - - contacts_vax_history_clean <- dplyr::mutate( - .data = contacts_vax_history_clean, - vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + contacts_vax_history_clean <- translate_categories( + data = contacts_vax_history_clean, + language_tokens = language_tokens ) contacts_vax_history_clean <- dplyr::mutate_at( diff --git a/man/clean_contact_vax_history.Rd b/man/clean_contact_vax_history.Rd index a24cd21..c7e49f3 100644 --- a/man/clean_contact_vax_history.Rd +++ b/man/clean_contact_vax_history.Rd @@ -4,10 +4,13 @@ \alias{clean_contact_vax_history} \title{Cleans vaccination data from contact data} \usage{ -clean_contact_vax_history(contacts) +clean_contact_vax_history(contacts, language_tokens) } \arguments{ \item{contacts}{A tibble with address information from contact data.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A tibble with cleaned and un-nested vaccination history data. @@ -31,6 +34,16 @@ contacts <- get_contacts( outbreak_id = outbreak_id ) -vax_history <- clean_contact_vax_history(contacts = contacts) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +vax_history <- clean_contact_vax_history( + contacts = contacts, + language_tokens = language_tokens +) } } diff --git a/tests/testthat/test-clean_contact_vax_history.R b/tests/testthat/test-clean_contact_vax_history.R index 3b53f8e..a632cbf 100644 --- a/tests/testthat/test-clean_contact_vax_history.R +++ b/tests/testthat/test-clean_contact_vax_history.R @@ -8,7 +8,17 @@ test_that("clean_contact_vax_history works as expected", { outbreak_id = outbreak_id ) - res <- clean_contact_vax_history(contacts = contacts) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_contact_vax_history( + contacts = contacts, + language_tokens = language_tokens + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") From 3316a804c0d2a6c71016e8eb25ea7128fa1e0f7c Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 17:03:48 +0100 Subject: [PATCH 190/203] used translation function in clean_contacts_of_contacts_address_history --- ...ean_contacts_of_contacts_address_history.R | 21 ++++++++++++++----- ...an_contacts_of_contacts_address_history.Rd | 16 ++++++++++++-- ...ean_contacts_of_contacts_address_history.R | 10 ++++++++- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/R/clean_contacts_of_contacts_address_history.R b/R/clean_contacts_of_contacts_address_history.R index b749a71..8bca0d2 100644 --- a/R/clean_contacts_of_contacts_address_history.R +++ b/R/clean_contacts_of_contacts_address_history.R @@ -8,6 +8,8 @@ #' Contacts of contacts data is returned by [`get_contacts_of_contacts()`]. #' @param locations_clean A `tibble` with cleaned location data. Location data #' is returned by [`get_locations()`] and cleaned by [`clean_locations()`]. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A `tibble` with address information from contacts of contacts data. #' @export @@ -33,13 +35,22 @@ #' ) #' locations_clean <- clean_locations(locations = locations) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' contact_of_contacts_add_hist <- clean_contacts_of_contacts_address_history( #' contacts_of_contacts = contacts_of_contacts, -#' locations_clean = locations_clean +#' locations_clean = locations_clean, +#' language_tokens = language_tokens #' ) #' } clean_contacts_of_contacts_address_history <- function(contacts_of_contacts, - locations_clean) { + locations_clean, + language_tokens) { coc_add_hist <- dplyr::filter( .data = contacts_of_contacts, @@ -67,9 +78,9 @@ clean_contacts_of_contacts_address_history <- function(contacts_of_contacts, .predicate = purrr::negate(is.list) ) - coc_add_hist <- dplyr::mutate( - .data = coc_add_hist, - addresses_typeid = sub(".*TYPE_", "", addresses_typeid) + coc_add_hist <- translate_categories( + data = coc_add_hist, + language_tokens = language_tokens ) coc_add_hist <- dplyr::left_join( diff --git a/man/clean_contacts_of_contacts_address_history.Rd b/man/clean_contacts_of_contacts_address_history.Rd index 2896a5f..92b1a46 100644 --- a/man/clean_contacts_of_contacts_address_history.Rd +++ b/man/clean_contacts_of_contacts_address_history.Rd @@ -6,7 +6,8 @@ \usage{ clean_contacts_of_contacts_address_history( contacts_of_contacts, - locations_clean + locations_clean, + language_tokens ) } \arguments{ @@ -15,6 +16,9 @@ Contacts of contacts data is returned by \code{\link[=get_contacts_of_contacts]{ \item{locations_clean}{A \code{tibble} with cleaned location data. Location data is returned by \code{\link[=get_locations]{get_locations()}} and cleaned by \code{\link[=clean_locations]{clean_locations()}}.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A \code{tibble} with address information from contacts of contacts data. @@ -45,9 +49,17 @@ locations <- get_locations( ) locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + contact_of_contacts_add_hist <- clean_contacts_of_contacts_address_history( contacts_of_contacts = contacts_of_contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) } } diff --git a/tests/testthat/test-clean_contacts_of_contacts_address_history.R b/tests/testthat/test-clean_contacts_of_contacts_address_history.R index c8a0589..0314c4e 100644 --- a/tests/testthat/test-clean_contacts_of_contacts_address_history.R +++ b/tests/testthat/test-clean_contacts_of_contacts_address_history.R @@ -15,9 +15,17 @@ test_that("clean_contacts_of_contacts_address_history works as expected", { ) locations_clean <- clean_locations(locations = locations) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + res <- clean_contacts_of_contacts_address_history( contacts_of_contacts = contacts_of_contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) expect_s3_class(res, "tbl_df") From fe7009dc36ac8dc9b636b9e9e40325ec2fe1cf17 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 17:11:14 +0100 Subject: [PATCH 191/203] used translation function in clean_contacts_of_contacts_vax_history --- R/clean_contacts_of_contacts_vax_history.R | 26 ++++++++++++------- man/clean_contacts_of_contacts_vax_history.Rd | 15 +++++++++-- ...t-clean_contacts_of_contacts_vax_history.R | 10 ++++++- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/R/clean_contacts_of_contacts_vax_history.R b/R/clean_contacts_of_contacts_vax_history.R index 44e75c7..10c8244 100644 --- a/R/clean_contacts_of_contacts_vax_history.R +++ b/R/clean_contacts_of_contacts_vax_history.R @@ -6,6 +6,8 @@ #' #' @param contacts_of_contacts A `tibble` with address information from contacts #' of contacts data. +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A `tibble` with cleaned and un-nested vaccination history data. #' @export @@ -24,11 +26,20 @@ #' outbreak_id = outbreak_id #' ) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' vax_history <- clean_contacts_of_contacts_vax_history( -#' contacts_of_contacts = contacts_of_contacts +#' contacts_of_contacts = contacts_of_contacts, +#' language_tokens = language_tokens #' ) #' } -clean_contacts_of_contacts_vax_history <- function(contacts_of_contacts) { +clean_contacts_of_contacts_vax_history <- function(contacts_of_contacts, + language_tokens) { coc_vacc_hist <- dplyr::filter( .data = contacts_of_contacts, @@ -47,14 +58,9 @@ clean_contacts_of_contacts_vax_history <- function(contacts_of_contacts) { tolower ) - coc_vacc_hist <- dplyr::mutate( - .data = coc_vacc_hist, - vaccinesreceived_vaccine = sub(".*VACCINE_", "", vaccinesreceived_vaccine) - ) - - coc_vacc_hist <- dplyr::mutate( - .data = coc_vacc_hist, - vaccinesreceived_status = sub(".*STATUS_", "", vaccinesreceived_status) + coc_vacc_hist <- translate_categories( + data = coc_vacc_hist, + language_tokens = language_tokens ) coc_vacc_hist <- dplyr::mutate_at( diff --git a/man/clean_contacts_of_contacts_vax_history.Rd b/man/clean_contacts_of_contacts_vax_history.Rd index 43cb262..fd355e1 100644 --- a/man/clean_contacts_of_contacts_vax_history.Rd +++ b/man/clean_contacts_of_contacts_vax_history.Rd @@ -4,11 +4,14 @@ \alias{clean_contacts_of_contacts_vax_history} \title{Cleans vaccination data from contacts of contacts data} \usage{ -clean_contacts_of_contacts_vax_history(contacts_of_contacts) +clean_contacts_of_contacts_vax_history(contacts_of_contacts, language_tokens) } \arguments{ \item{contacts_of_contacts}{A \code{tibble} with address information from contacts of contacts data.} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A \code{tibble} with cleaned and un-nested vaccination history data. @@ -32,8 +35,16 @@ contacts_of_contacts <- get_contacts_of_contacts( outbreak_id = outbreak_id ) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + vax_history <- clean_contacts_of_contacts_vax_history( - contacts_of_contacts = contacts_of_contacts + contacts_of_contacts = contacts_of_contacts, + language_tokens = language_tokens ) } } diff --git a/tests/testthat/test-clean_contacts_of_contacts_vax_history.R b/tests/testthat/test-clean_contacts_of_contacts_vax_history.R index e537ec7..425c116 100644 --- a/tests/testthat/test-clean_contacts_of_contacts_vax_history.R +++ b/tests/testthat/test-clean_contacts_of_contacts_vax_history.R @@ -8,8 +8,16 @@ test_that("clean_contacts_of_contacts_vax_history works as expected", { outbreak_id = outbreak_id ) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + res <- clean_contacts_of_contacts_vax_history( - contacts_of_contacts = contacts_of_contacts + contacts_of_contacts = contacts_of_contacts, + language_tokens = language_tokens ) expect_s3_class(res, "tbl_df") From ae4a7e58d38818fa9511253ccb31d6b58da1c9ec Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 17:17:57 +0100 Subject: [PATCH 192/203] removing extra #' in clean_case_vax_history --- R/clean_case_vax_history.R | 2 +- man/clean_case_vax_history.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/clean_case_vax_history.R b/R/clean_case_vax_history.R index 3c30f58..9201fbb 100644 --- a/R/clean_case_vax_history.R +++ b/R/clean_case_vax_history.R @@ -11,7 +11,7 @@ #' @export #' #' @examples -#' #' \dontrun{ +#' \dontrun{ #' url <- "https://MyGoDataServer.com/" #' username <- "myemail@email.com" #' password <- "mypassword" diff --git a/man/clean_case_vax_history.Rd b/man/clean_case_vax_history.Rd index 710d73d..5b3121e 100644 --- a/man/clean_case_vax_history.Rd +++ b/man/clean_case_vax_history.Rd @@ -20,7 +20,7 @@ Cleans and un-nests vaccination history, where vaccination is complete, from case data. Case data is returned from \code{\link[=get_cases]{get_cases()}}. } \examples{ -#' \dontrun{ +\dontrun{ url <- "https://MyGoDataServer.com/" username <- "myemail@email.com" password <- "mypassword" From 77c0665a5e6b8b24295e8769cec6b21eb88a5571 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Thu, 6 Apr 2023 17:36:53 +0100 Subject: [PATCH 193/203] used translation function in clean_contacts_of_contacts (function fails) --- R/clean_contacts_of_contacts.R | 47 ++++++++++++++++--------------- man/clean_contacts_of_contacts.Rd | 30 ++++++++++++++------ 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/R/clean_contacts_of_contacts.R b/R/clean_contacts_of_contacts.R index c9c6444..1c25de8 100644 --- a/R/clean_contacts_of_contacts.R +++ b/R/clean_contacts_of_contacts.R @@ -11,16 +11,18 @@ #' @param contacts_of_contacts_vacc_history_clean A `tibble` containing the #' cleaned vaccination history from contacts of contacts (data is cleaned by #' [`clean_contacts_of_contacts_vax_history()`]). +#' @param language_tokens A tibble of language tokens returned by +#' [`get_language_tokens()`] to translate the string tokens in the data. #' #' @return A `tibble` containing the cleaned contacts of contacts data. #' @export #' #' @examples #' \dontrun{ -#' url <- "https://MyGoDataServer.com/" -#' username <- "myemail@email.com" -#' password <- "mypassword" -#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' #' contacts_of_contacts <- get_contacts_of_contacts( #' url = url, @@ -37,24 +39,35 @@ #' #' locations_clean <- clean_locations(locations = locations) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' contacts_of_contacts_address_history_clean <- clean_contacts_of_contacts_address_history( #' contacts_of_contacts = contacts_of_contacts, -#' locations_clean = locations_clean +#' locations_clean = locations_clean, +#' language_tokens = language_tokens #' ) #' #' contacts_of_contacts_vacc_history_clean <- clean_contacts_of_contacts_vax_history( -#' contacts_of_contacts = contacts_of_contacts +#' contacts_of_contacts = contacts_of_contacts, +#' language_tokens = language_tokens #' ) #' #' contacts_of_contacts_clean <- clean_contacts_of_contacts( #' contacts_of_contacts = contacts_of_contacts, #' contacts_of_contacts_address_history_clean = contacts_of_contacts_address_history_clean, -#' contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean +#' contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean, +#' language_tokens = language_tokens #' ) #' } clean_contacts_of_contacts <- function(contacts_of_contacts, contacts_of_contacts_address_history_clean, - contacts_of_contacts_vacc_history_clean) { + contacts_of_contacts_vacc_history_clean, + language_tokens) { # Remove all deleted records coc_clean <- dplyr::filter( @@ -103,24 +116,14 @@ clean_contacts_of_contacts <- function(contacts_of_contacts, ) # truncate responses of categorical vars so easier to read - coc_clean <- dplyr::mutate( - .data = coc_clean, - classification = sub(".*CLASSIFICATION_", "", classification), - gender = sub(".*GENDER_", "", gender), - occupation = sub(".*OCCUPATION_", "", occupation), - outcome = sub(".*OUTCOME_", "", outcome_id), - pregnancy_status = sub(".*STATUS_", "", pregnancy_status), - risk_level = sub(".*LEVEL_", "", risk_level), - relationship_certainty_level = sub(".*LEVEL_", "", relationship_certainty_level_id), - relationship_exposure_type = sub(".*TYPE_", "", relationship_exposure_type_id), - relationship_context_of_transmission = sub(".*TRANSMISSION_", "", relationship_social_relationship_type_id), - relationship_exposure_frequency = sub(".*FREQUENCY_", "", relationship_exposure_frequency_id), - relationship_exposure_duration = sub(".*DURATION_", "", relationship_exposure_duration_id) + coc_clean <- translate_categories( + data = coc_clean, + language_tokens = language_tokens ) contacts_of_contacts_address_history_clean <- dplyr::filter( .data = contacts_of_contacts_address_history_clean, - addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + addresses_typeid == "Current address" ) # join in current address from address history, only current place of diff --git a/man/clean_contacts_of_contacts.Rd b/man/clean_contacts_of_contacts.Rd index 2cada00..1b31a65 100644 --- a/man/clean_contacts_of_contacts.Rd +++ b/man/clean_contacts_of_contacts.Rd @@ -7,7 +7,8 @@ clean_contacts_of_contacts( contacts_of_contacts, contacts_of_contacts_address_history_clean, - contacts_of_contacts_vacc_history_clean + contacts_of_contacts_vacc_history_clean, + language_tokens ) } \arguments{ @@ -21,6 +22,9 @@ cleaned address history data from contacts of contacts (data is cleaned by \item{contacts_of_contacts_vacc_history_clean}{A \code{tibble} containing the cleaned vaccination history from contacts of contacts (data is cleaned by \code{\link[=clean_contacts_of_contacts_vax_history]{clean_contacts_of_contacts_vax_history()}}).} + +\item{language_tokens}{A tibble of language tokens returned by +\code{\link[=get_language_tokens]{get_language_tokens()}} to translate the string tokens in the data.} } \value{ A \code{tibble} containing the cleaned contacts of contacts data. @@ -31,10 +35,10 @@ contacts data is returned by \code{\link[=get_contacts_of_contacts]{get_contacts } \examples{ \dontrun{ - url <- "https://MyGoDataServer.com/" - username <- "myemail@email.com" - password <- "mypassword" - outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" contacts_of_contacts <- get_contacts_of_contacts( url = url, @@ -51,19 +55,29 @@ locations <- get_locations( locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + contacts_of_contacts_address_history_clean <- clean_contacts_of_contacts_address_history( contacts_of_contacts = contacts_of_contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) contacts_of_contacts_vacc_history_clean <- clean_contacts_of_contacts_vax_history( - contacts_of_contacts = contacts_of_contacts + contacts_of_contacts = contacts_of_contacts, + language_tokens = language_tokens ) contacts_of_contacts_clean <- clean_contacts_of_contacts( contacts_of_contacts = contacts_of_contacts, contacts_of_contacts_address_history_clean = contacts_of_contacts_address_history_clean, - contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean + contacts_of_contacts_vacc_history_clean = contacts_of_contacts_vacc_history_clean, + language_tokens = language_tokens ) } } From 3e4d5020b95854d092a40fba61b929e05b11d7cd Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 11:48:03 +0100 Subject: [PATCH 194/203] added janitor as package dependency --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ef8a27d..d45210b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,8 @@ Imports: stringr, tibble, tidyr, - urltools + urltools, + janitor Suggests: devtools, testthat (>= 3.0.0) From dedc9dfb59520988b39ca665c61f269a41413bad Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 12:20:14 +0100 Subject: [PATCH 195/203] used translation function in clean_contacts --- R/clean_contacts.R | 141 ++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/R/clean_contacts.R b/R/clean_contacts.R index d2907d8..5e8bd37 100644 --- a/R/clean_contacts.R +++ b/R/clean_contacts.R @@ -19,57 +19,79 @@ #' #' @examples #' \dontrun{ -#' url <- "https://MyGoDataServer.com/" -#' username <- "myemail@email.com" -#' password <- "mypassword" -#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +#' url <- "https://MyGoDataServer.com/" +#' username <- "myemail@email.com" +#' password <- "mypassword" +#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" #' -#' contacts <- get_contacts( -#' url = url, -#' username = username, -#' password = password, -#' outbreak_id = outbreak_id -#' ) +#' contacts <- get_contacts( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) #' -#' locations <- get_locations( -#' url = url, -#' username = username, -#' password = password -#' ) +#' locations <- get_locations( +#' url = url, +#' username = username, +#' password = password +#' ) #' -#' locations_clean <- clean_locations(locations = locations) +#' locations_clean <- clean_locations(locations = locations) #' -#' # other cleaned data required for `clean_contacts()` -#' contacts_vacc_history_clean <- clean_contact_vax_history(contacts = contacts) -#' contacts_address_history_clean <- clean_contact_address_history( -#' contacts = contacts, -#' locations_clean = locations_clean -#' ) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) #' -#' cases <- get_cases( -#' url = url, -#' username = username, -#' password = password, -#' outbreak_id = outbreak_id -#' ) -#' cases_address_history_clean <- clean_case_address_history(cases = cases) -#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases) -#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +#' # other cleaned data required for `clean_contacts()` +#' contacts_vacc_history_clean <- clean_contact_vax_history( +#' contacts = contacts, +#' language_tokens = language_tokens +#' ) +#' contacts_address_history_clean <- clean_contact_address_history( +#' contacts = contacts, +#' locations_clean = locations_clean, +#' language_tokens = language_tokens +#' ) #' -#' cases_clean <- clean_cases( -#' cases = cases, -#' cases_address_history_clean = cases_address_history_clean, -#' cases_vacc_history_clean = cases_vacc_history_clean, -#' cases_dateranges_history_clean = cases_dateranges_history_clean -#' ) -#' contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) +#' cases <- get_cases( +#' url = url, +#' username = username, +#' password = password, +#' outbreak_id = outbreak_id +#' ) +#' cases_address_history_clean <- clean_case_address_history( +#' cases = cases, +#' locations_clean = locations_clean, +#' language_tokens = language_tokens +#' ) +#' cases_vacc_history_clean <- clean_case_vax_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) +#' cases_dateranges_history_clean <- clean_case_med_history( +#' cases = cases, +#' language_tokens = language_tokens +#' ) #' -#' contacts_clean <- clean_contacts( -#' contacts = contacts, -#' contacts_address_history_clean = cases_address_history_clean, -#' contacts_vacc_history_clean = cases_vacc_history_clean, -#' contacts_becoming_cases = contacts_becoming_cases -#' ) +#' cases_clean <- clean_cases( +#' cases = cases, +#' cases_address_history_clean = cases_address_history_clean, +#' cases_vacc_history_clean = cases_vacc_history_clean, +#' cases_dateranges_history_clean = cases_dateranges_history_clean, +#' language_tokens = language_tokens +#' ) +#' contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) +#' +#' contacts_clean <- clean_contacts( +#' contacts = contacts, +#' contacts_address_history_clean = cases_address_history_clean, +#' contacts_vacc_history_clean = cases_vacc_history_clean, +#' contacts_becoming_cases = contacts_becoming_cases +#' ) #' } clean_contacts <- function(contacts, contacts_address_history_clean, @@ -145,26 +167,25 @@ clean_contacts <- function(contacts, datetime_created_at = as.POSIXct(datetime_created_at, format = "%Y-%m-%dT%H:%M") ) - # truncate responses of categorical vars so easier to read - contacts_clean <- dplyr::mutate( + # translate responses of categorical vars so easier to read + contacts_clean <- translate_categories( + data = contacts_clean, + language_tokens = language_tokens + ) + + contacts_clean <- dplyr::rename( .data = contacts_clean, - classification = sub(".*CLASSIFICATION_", "", classification), - gender = sub(".*GENDER_", "", gender), - occupation = sub(".*OCCUPATION_", "", occupation), - outcome = sub(".*OUTCOME_", "", outcome_id), - pregnancy_status = sub(".*STATUS_", "", pregnancy_status), - risk_level = sub(".*LEVEL_", "", risk_level), - follow_up_status = sub(".*TYPE_", "", follow_up_status), - relationship_certainty_level = sub(".*LEVEL_", "", relationship_certainty_level_id), - relationship_exposure_type = sub(".*TYPE_", "", relationship_exposure_type_id), - relationship_context_of_transmission = sub(".*TRANSMISSION_", "", relationship_social_relationship_type_id), - relationship_exposure_frequency = sub(".*FREQUENCY_", "", relationship_exposure_frequency_id), - relationship_exposure_duration = sub(".*DURATION_", "", relationship_exposure_duration_id) + outcome = "outcome_id", + relationship_certainty_level = "relationship_certainty_level_id", + relationship_exposure_type = "relationship_exposure_type_id", + relationship_context_of_transmission = "relationship_social_relationship_type_id", + relationship_exposure_frequency = "relationship_exposure_frequency_id", + relationship_exposure_duration = "relationship_exposure_duration_id" ) contacts_address_history_clean <- dplyr::filter( .data = contacts_address_history_clean, - addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + addresses_typeid == "Current address" ) # join in current address from address history, only current place of residence @@ -177,7 +198,7 @@ clean_contacts <- function(contacts, # join in info from vacc block contacts_clean <- dplyr::mutate( .data = contacts_clean, - vaccinated = case_when(id %in% contacts_vacc_history_clean$id[contacts_vacc_history_clean$vaccinesreceived_status == "VACCINATED"] ~ TRUE, TRUE ~ FALSE) + vaccinated = case_when(id %in% contacts_vacc_history_clean$id[contacts_vacc_history_clean$vaccinesreceived_status == "Vaccinated"] ~ TRUE, TRUE ~ FALSE) ) # force NA ages to appear as NA, not as 0 like sometimes occurs From dd93982af4d15fbc396be0c273dafbfdad0b03df Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 12:20:28 +0100 Subject: [PATCH 196/203] added test for clean_contacts --- tests/testthat/test-clean_contacts.R | 104 +++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 tests/testthat/test-clean_contacts.R diff --git a/tests/testthat/test-clean_contacts.R b/tests/testthat/test-clean_contacts.R new file mode 100644 index 0000000..d3e1d5f --- /dev/null +++ b/tests/testthat/test-clean_contacts.R @@ -0,0 +1,104 @@ +test_that("clean_contacts works as expected", { + skip("get_contacts requires API call") + + contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + + locations_clean <- clean_locations(locations = locations) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + # other cleaned data required for `clean_contacts()` + contacts_vacc_history_clean <- clean_contact_vax_history( + contacts = contacts, + language_tokens = language_tokens + ) + contacts_address_history_clean <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean, + language_tokens = language_tokens + ) + + cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id + ) + cases_address_history_clean <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean, + language_tokens = language_tokens + ) + cases_vacc_history_clean <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens + ) + cases_dateranges_history_clean <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens + ) + + cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean, + language_tokens = language_tokens + ) + contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) + + res <- clean_contacts( + contacts = contacts, + contacts_address_history_clean = cases_address_history_clean, + contacts_vacc_history_clean = cases_vacc_history_clean, + contacts_becoming_cases = contacts_becoming_cases + ) + + expect_s3_class(res, "tbl_df") + expect_s3_class(res, "data.frame") + expect_identical(dim(res), c(19L, 48L)) + expect_true( + all(c( + "id", "visual_id", "classification", "follow_up_status", "first_name", + "middle_name", "last_name", "gender", "age", "age_class", "occupation", + "pregnancy_status", "date_of_reporting", "date_of_last_contact", + "date_of_burial", "date_of_follow_up_start", "date_of_follow_up_end", + "was_case", "risk_level", "risk_reason", "safe_burial", + "transfer_refused", "responsible_user_id", "follow_up_team_id", + "admin_0_name", "admin_1_name", "admin_2_name", "lat", "long", "address", + "postal_code", "city", "telephone", "email", "vaccinated", "outcome", + "date_of_outcome", "relationship_exposure_type", + "relationship_context_of_transmission", "relationship_exposure_duration", + "relationship_exposure_frequency", "relationship_certainty_level", + "relationship_cluster_id", "location_id", "created_by", + "datetime_created_at", "updated_by", "datetime_updated_at" + ) %in% colnames(res)) + ) + + expect_identical( + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + rep("character", 8), "numeric", "factor", "character", "character", + rep("Date", 5), "logical", "character", "character", "logical", "logical", + rep("character", 5), "numeric", "numeric", rep("character", 5), "logical", + "character", "Date", rep("character", 5), "logical", "character", + "character", "POSIXct", "character", "POSIXct" + ) + ) +}) From 31734f813e3358333273aa5de67a5f616122112b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 13:58:27 +0100 Subject: [PATCH 197/203] updated clean_events function and test --- R/clean_events.R | 2 +- tests/testthat/test-clean_events.R | 33 ++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/R/clean_events.R b/R/clean_events.R index 7bd02fa..1288477 100644 --- a/R/clean_events.R +++ b/R/clean_events.R @@ -96,7 +96,7 @@ clean_events <- function(events, "date", # dates "date_of_reporting", # dates "description", - "responsible_user_id", # assigned contact tracer + "responsible_user", # assigned contact tracer matches("^admin_.*name$"), lat = "address_geo_location_lat", # address long = "address_geo_location_lng", # address diff --git a/tests/testthat/test-clean_events.R b/tests/testthat/test-clean_events.R index 90cd59c..96b71e4 100644 --- a/tests/testthat/test-clean_events.R +++ b/tests/testthat/test-clean_events.R @@ -1,28 +1,45 @@ test_that("clean_events works as expected", { skip("get_events requires API call") - res <- get_events( + events <- get_events( url = url, username = username, password = password, outbreak_id = outbreak_id ) - res <- clean_events(events = res) + + locations <- get_locations( + url = url, + username = username, + password = password + ) + locations_clean <- clean_locations(locations = locations) + + res <- clean_events( + events = events, + locations_clean = locations_clean + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(3L, 15L)) + expect_identical(dim(res), c(3L, 21L)) expect_true( all(c( - "id", "name", "user_ids_1", "user_ids_2", "user_ids_3", "user_ids_4", - "user_ids_5", "location_ids_1", "location_ids_2", "location_ids_3", - "location_ids_4", "created_by", "datetime_created_at", "updated_by", + "id", "name", "date", "date_of_reporting", "description", + "responsible_user", "admin_0_name", "admin_1_name", "admin_2_name", "lat", + "long", "address", "postal_code", "city", "telephone", "email", + "location_id", "created_by", "datetime_created_at", "updated_by", "datetime_updated_at" ) %in% colnames(res)) ) expect_identical( - unname(sapply(res[1, ], class)), - rep("character", 15) + unname(sapply(res[1, ], function(x) class(x)[1])), + c( + "character", "character", "character", "Date", "logical", "logical", + "character", "character", "character", "numeric", "numeric", "logical", + "logical", "logical", "logical", "logical", "character", "character", + "POSIXct", "character", "POSIXct" + ) ) }) From 00762473e217367ddc8a40547ef4161967a0322d Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 14:22:56 +0100 Subject: [PATCH 198/203] used translation function for clean_followups and updated test --- R/clean_followups.R | 29 ++++++++++++++++++++------- tests/testthat/test-clean_followups.R | 12 +++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/R/clean_followups.R b/R/clean_followups.R index 056d604..1be33d5 100644 --- a/R/clean_followups.R +++ b/R/clean_followups.R @@ -41,18 +41,28 @@ #' #' locations_clean <- clean_locations(locations = locations) #' +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' #' contacts_address_history_clean <- clean_contact_address_history( #' contacts = contacts, -#' locations_clean = locations_clean +#' locations_clean = locations_clean, +#' language_tokens = language_tokens #' ) #' #' followups_clean <- clean_followups( #' followups = followups, -#' contacts_address_history_clean = contacts_address_history_clean +#' contacts_address_history_clean = contacts_address_history_clean, +#' language_tokens = language_tokens #' ) #' } clean_followups <- function(followups, - contacts_address_history_clean) { + contacts_address_history_clean, + language_tokens) { # Remove all deleted records followups_clean <- dplyr::filter( @@ -105,15 +115,20 @@ clean_followups <- function(followups, datetime_created_at = as.POSIXct(datetime_created_at, format = "%Y-%m-%dT%H:%M") ) - # truncate responses of categorical vars so easier to read - followups_clean <- dplyr::mutate( + # translate responses of categorical vars so easier to read + followups_clean <- translate_categories( + data = followups_clean, + language_tokens = language_tokens + ) + + followups_clean <- dplyr::rename( .data = followups_clean, - followup_status = sub(".*TYPE_", "", status_id) + followup_status = "status_id" ) contacts_address_history_clean <- dplyr::filter( .data = contacts_address_history_clean, - addresses_typeid == "USUAL_PLACE_OF_RESIDENCE" + addresses_typeid == "Current address" ) followups_clean <- dplyr::left_join( diff --git a/tests/testthat/test-clean_followups.R b/tests/testthat/test-clean_followups.R index 630f7d4..85724d5 100644 --- a/tests/testthat/test-clean_followups.R +++ b/tests/testthat/test-clean_followups.R @@ -23,9 +23,17 @@ test_that("clean_followups works as expected", { locations_clean <- clean_locations(locations = locations) + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + contacts_address_history_clean <- clean_contact_address_history( contacts = contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) res <- clean_followups( @@ -35,7 +43,7 @@ test_that("clean_followups works as expected", { expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(49L, 24L)) + expect_identical(dim(res), c(172L, 24L)) expect_true( all(c( "id", "contact_id", "contact_visual_id", "date", "followup_number", From 2569f9f78008f7ff1e815adcf77a7f71827e0f8a Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 14:30:29 +0100 Subject: [PATCH 199/203] used translation function in clean_locations and updated test --- R/clean_locations.R | 21 +++++++++++++++++---- tests/testthat/test-clean_locations.R | 12 ++++++++++-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/R/clean_locations.R b/R/clean_locations.R index 1c5afba..49bf1f3 100644 --- a/R/clean_locations.R +++ b/R/clean_locations.R @@ -21,9 +21,17 @@ #' password = password #' ) #' -#' clean_locations(locations = locations) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' clean_locations(locations = locations, language_tokens = language_tokens) #' } -clean_locations <- function(locations) { +clean_locations <- function(locations, + language_tokens) { # filter out delete and inactive (or NA) values clean_locations <- dplyr::filter( @@ -35,10 +43,15 @@ clean_locations <- function(locations) { .data$active == TRUE | is.na(.data$active) ) + clean_locations <- translate_categories( + data = clean_locations, + language_tokens = language_tokens + ) + # add admin-level column - clean_locations <- dplyr::mutate( + clean_locations <- dplyr::rename( .data = clean_locations, - admin_level = sub(".*LEVEL_", "", .data$geographicalLevelId) + admin_level = "geographicalLevelId" ) # select columns diff --git a/tests/testthat/test-clean_locations.R b/tests/testthat/test-clean_locations.R index 9e6648c..95b6824 100644 --- a/tests/testthat/test-clean_locations.R +++ b/tests/testthat/test-clean_locations.R @@ -1,12 +1,20 @@ test_that("clean_locations works as expected", { skip("get_locations requires API call") - res <- get_locations( + locations <- get_locations( url = url, username = username, password = password ) - res <- clean_locations(locations = res) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_locations(locations = locations, language_tokens = language_tokens) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") From ac142b39784010eedf03b3c8f28e050fd2f7b46b Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 14:38:39 +0100 Subject: [PATCH 200/203] used translation function in clean_relationships and updated test --- R/clean_relationships.R | 24 ++++++++++++++++------- tests/testthat/test-clean_relationships.R | 15 ++++++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/R/clean_relationships.R b/R/clean_relationships.R index c0cc883..afbe7d5 100644 --- a/R/clean_relationships.R +++ b/R/clean_relationships.R @@ -23,9 +23,20 @@ #' outbreak_id = outbreak_id #' ) #' -#' clean_relationships <- clean_relationships(relationships) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' clean_relationships <- clean_relationships( +#' relationships, +#' language_tokens = language_tokens +#' ) #' } -clean_relationships <- function(relationships) { +clean_relationships <- function(relationships, + language_tokens) { # Remove all deleted records clean_relationships <- dplyr::filter( @@ -77,11 +88,10 @@ clean_relationships <- function(relationships) { ) ) - # truncate responses of categorical vars so easier to read - clean_relationships <- dplyr::mutate( - .data = clean_relationships, - source_person_type = sub(".*TYPE_", "", source_person_type), - target_person_type = sub(".*TYPE_", "", target_person_type) + # translate responses of categorical vars so easier to read + clean_relationships <- translate_categories( + data = clean_relationships, + language_tokens = language_tokens ) # organize order of vars, only bring in what we need, take away confusing vars diff --git a/tests/testthat/test-clean_relationships.R b/tests/testthat/test-clean_relationships.R index 6fff74c..3fcac36 100644 --- a/tests/testthat/test-clean_relationships.R +++ b/tests/testthat/test-clean_relationships.R @@ -7,11 +7,22 @@ test_that("clean_relationships works as expected", { password = password, outbreak_id = outbreak_id ) - res <- clean_relationships(relationships = relationships) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_relationships( + relationships = relationships, + language_tokens = language_tokens + ) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(28L, 11L)) + expect_identical(dim(res), c(29L, 11L)) expect_true( all(c( "id", "source_person_id", "source_person_visual_id", "target_person_id", From fdcb33ad48798c13128ac5bfe8de4c5bde015fe7 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 16:00:19 +0100 Subject: [PATCH 201/203] updated clean_teams test --- tests/testthat/test-clean_teams.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-clean_teams.R b/tests/testthat/test-clean_teams.R index ff8a10f..5ca4029 100644 --- a/tests/testthat/test-clean_teams.R +++ b/tests/testthat/test-clean_teams.R @@ -6,7 +6,7 @@ test_that("clean_teams works as expected", { expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") - expect_identical(dim(res), c(4L, 15L)) + expect_identical(dim(res), c(5L, 15L)) expect_true( all(c( "id", "name", "user_ids_1", "user_ids_2", "user_ids_3", "user_ids_4", From 5bb71f6bac8d1eb8e12e855a6772103c0260a698 Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 16:05:44 +0100 Subject: [PATCH 202/203] used translation function in clean_users and updated test --- R/clean_users.R | 20 ++++++++++++++------ tests/testthat/test-clean_users.R | 10 +++++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/R/clean_users.R b/R/clean_users.R index cca13d3..7bc15c9 100644 --- a/R/clean_users.R +++ b/R/clean_users.R @@ -21,9 +21,17 @@ #' password = password #' ) #' -#' clean_users <- clean_users(users) +#' language_tokens <- get_language_tokens( +#' url = url, +#' username = username, +#' password = password, +#' language = "english_us" +#' ) +#' +#' clean_users <- clean_users(users = users, language_tokens = language_tokens) #' } -clean_users <- function(users) { +clean_users <- function(users, + language_tokens) { # standardize column name syntax clean_users <- janitor::clean_names(users) @@ -43,10 +51,10 @@ clean_users <- function(users) { clean_users <- tidyr::unnest_wider(clean_users, "role_ids", names_sep = "_") - # truncate responses of categorical vars so easier to read - clean_users <- dplyr::mutate( - clean_users, - institution_name = sub(".*NAME_", "", .data$institution_name) + # translate responses of categorical vars so easier to read + clean_users <- translate_categories( + data = clean_users, + language_tokens = language_tokens ) # organize order of vars, only bring in what we need, take away confusing vars diff --git a/tests/testthat/test-clean_users.R b/tests/testthat/test-clean_users.R index 876963a..338b16c 100644 --- a/tests/testthat/test-clean_users.R +++ b/tests/testthat/test-clean_users.R @@ -2,7 +2,15 @@ test_that("clean_users works as expected", { skip("get_users requires API call") users <- get_users(url = url, username = username, password = password) - res <- clean_users(users = users) + + language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" + ) + + res <- clean_users(users = users, language_tokens = language_tokens) expect_s3_class(res, "tbl_df") expect_s3_class(res, "data.frame") From f6daa6f47397d84087c200bbd8bf3bcc28ed72bd Mon Sep 17 00:00:00 2001 From: Joshua Lambert Date: Fri, 14 Apr 2023 16:06:34 +0100 Subject: [PATCH 203/203] updated examples in documentation --- man/clean_contacts.Rd | 110 ++++++++++++++++++++++--------------- man/clean_followups.Rd | 15 ++++- man/clean_locations.Rd | 11 +++- man/clean_relationships.Rd | 14 ++++- man/clean_users.Rd | 11 +++- 5 files changed, 108 insertions(+), 53 deletions(-) diff --git a/man/clean_contacts.Rd b/man/clean_contacts.Rd index 02de61c..a5d5f90 100644 --- a/man/clean_contacts.Rd +++ b/man/clean_contacts.Rd @@ -35,56 +35,78 @@ Cleans and un-nests contact data. Contact data is returned by } \examples{ \dontrun{ - url <- "https://MyGoDataServer.com/" - username <- "myemail@email.com" - password <- "mypassword" - outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" +url <- "https://MyGoDataServer.com/" +username <- "myemail@email.com" +password <- "mypassword" +outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b" - contacts <- get_contacts( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id - ) +contacts <- get_contacts( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) + +locations <- get_locations( + url = url, + username = username, + password = password +) - locations <- get_locations( - url = url, - username = username, - password = password - ) +locations_clean <- clean_locations(locations = locations) - locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) - # other cleaned data required for `clean_contacts()` - contacts_vacc_history_clean <- clean_contact_vax_history(contacts = contacts) - contacts_address_history_clean <- clean_contact_address_history( - contacts = contacts, - locations_clean = locations_clean - ) +# other cleaned data required for `clean_contacts()` +contacts_vacc_history_clean <- clean_contact_vax_history( + contacts = contacts, + language_tokens = language_tokens +) +contacts_address_history_clean <- clean_contact_address_history( + contacts = contacts, + locations_clean = locations_clean, + language_tokens = language_tokens +) - cases <- get_cases( - url = url, - username = username, - password = password, - outbreak_id = outbreak_id - ) - cases_address_history_clean <- clean_case_address_history(cases = cases) - cases_vacc_history_clean <- clean_case_vax_history(cases = cases) - cases_dateranges_history_clean <- clean_case_med_history(cases = cases) +cases <- get_cases( + url = url, + username = username, + password = password, + outbreak_id = outbreak_id +) +cases_address_history_clean <- clean_case_address_history( + cases = cases, + locations_clean = locations_clean, + language_tokens = language_tokens +) +cases_vacc_history_clean <- clean_case_vax_history( + cases = cases, + language_tokens = language_tokens +) +cases_dateranges_history_clean <- clean_case_med_history( + cases = cases, + language_tokens = language_tokens +) - cases_clean <- clean_cases( - cases = cases, - cases_address_history_clean = cases_address_history_clean, - cases_vacc_history_clean = cases_vacc_history_clean, - cases_dateranges_history_clean = cases_dateranges_history_clean - ) - contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) +cases_clean <- clean_cases( + cases = cases, + cases_address_history_clean = cases_address_history_clean, + cases_vacc_history_clean = cases_vacc_history_clean, + cases_dateranges_history_clean = cases_dateranges_history_clean, + language_tokens = language_tokens +) +contacts_becoming_cases <- cases_from_contacts(cases_clean = cases_clean) - contacts_clean <- clean_contacts( - contacts = contacts, - contacts_address_history_clean = cases_address_history_clean, - contacts_vacc_history_clean = cases_vacc_history_clean, - contacts_becoming_cases = contacts_becoming_cases - ) +contacts_clean <- clean_contacts( + contacts = contacts, + contacts_address_history_clean = cases_address_history_clean, + contacts_vacc_history_clean = cases_vacc_history_clean, + contacts_becoming_cases = contacts_becoming_cases +) } } diff --git a/man/clean_followups.Rd b/man/clean_followups.Rd index 074c21d..a4d3745 100644 --- a/man/clean_followups.Rd +++ b/man/clean_followups.Rd @@ -4,7 +4,7 @@ \alias{clean_followups} \title{Clean followup data} \usage{ -clean_followups(followups, contacts_address_history_clean) +clean_followups(followups, contacts_address_history_clean, language_tokens) } \arguments{ \item{followups}{A \code{tibble} with events data. Followup data is returned by @@ -50,14 +50,23 @@ locations <- get_locations( locations_clean <- clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + contacts_address_history_clean <- clean_contact_address_history( contacts = contacts, - locations_clean = locations_clean + locations_clean = locations_clean, + language_tokens = language_tokens ) followups_clean <- clean_followups( followups = followups, - contacts_address_history_clean = contacts_address_history_clean + contacts_address_history_clean = contacts_address_history_clean, + language_tokens = language_tokens ) } } diff --git a/man/clean_locations.Rd b/man/clean_locations.Rd index 0e2578b..3752796 100644 --- a/man/clean_locations.Rd +++ b/man/clean_locations.Rd @@ -4,7 +4,7 @@ \alias{clean_locations} \title{Cleans location data} \usage{ -clean_locations(locations) +clean_locations(locations, language_tokens) } \arguments{ \item{locations}{A \code{\link{tibble}} containing locations data. This is the data @@ -29,6 +29,13 @@ locations <- get_locations( password = password ) -clean_locations(locations = locations) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +clean_locations(locations = locations, language_tokens = language_tokens) } } diff --git a/man/clean_relationships.Rd b/man/clean_relationships.Rd index e834893..cedab1a 100644 --- a/man/clean_relationships.Rd +++ b/man/clean_relationships.Rd @@ -4,7 +4,7 @@ \alias{clean_relationships} \title{Cleans relationship data} \usage{ -clean_relationships(relationships) +clean_relationships(relationships, language_tokens) } \arguments{ \item{relationships}{A \code{tibble} of relationship data. Relationship data is @@ -31,6 +31,16 @@ relationships <- get_relationships( outbreak_id = outbreak_id ) -clean_relationships <- clean_relationships(relationships) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +clean_relationships <- clean_relationships( + relationships, + language_tokens = language_tokens +) } } diff --git a/man/clean_users.Rd b/man/clean_users.Rd index d06731b..8c9d11f 100644 --- a/man/clean_users.Rd +++ b/man/clean_users.Rd @@ -4,7 +4,7 @@ \alias{clean_users} \title{Cleans users data} \usage{ -clean_users(users) +clean_users(users, language_tokens) } \arguments{ \item{users}{A \code{tibble} containing users data. Users data is returned by @@ -29,6 +29,13 @@ users <- get_users( password = password ) -clean_users <- clean_users(users) +language_tokens <- get_language_tokens( + url = url, + username = username, + password = password, + language = "english_us" +) + +clean_users <- clean_users(users = users, language_tokens = language_tokens) } }