Skip to content

Commit

Permalink
time scale and disease hierarchy harmonization tools
Browse files Browse the repository at this point in the history
  • Loading branch information
stevencarlislewalker committed Mar 28, 2024
1 parent a737996 commit 38fcd55
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 14 deletions.
1 change: 1 addition & 0 deletions R/iidda/NAMESPACE
Expand Up @@ -30,6 +30,7 @@ export(extract_char_or_blank)
export(extract_or_blank)
export(fill_and_wrap)
export(fill_re_template)
export(filter_out_time_scales)
export(fix_csv)
export(flatten_disease_hierarchy)
export(freq_to_by)
Expand Down
74 changes: 68 additions & 6 deletions R/iidda/R/data_prep_tools.R
Expand Up @@ -669,32 +669,39 @@ is_leaf_disease = function(disease, nesting_disease) !disease %in% unique(nestin
#' Flatten Disease Hierarchy
#'
#' Take a tidy data set with a potentially complex disease hierarchy
#' and flatten this hierarchy so that, at any particular time and location,
#' all diseases in the `disease` column have the same `nesting_disease`.
#' and flatten this hierarchy so that, at any particular time and location
#' (or some other context), all diseases in the `disease` column have the
#' same `nesting_disease`.
#'
#' @param data A tidy data set with the following minimal set of columns:
#' `disease`, `nesting_disease`, `period_start_date`, `period_end_date`,
#' and `location` (TODO: generalized so that the last three are
#' configurable).
#' and `location`. Note that the latter three can be modified with
#' `grouping_columns`.
#' @param disease_lookup A lookup table with `disease` and `nesting_disease`
#' columns that describe a global disease hierarchy that will be applied
#' locally to flatten disease hierarchy at each point in time and space
#' in the tidy data set in the `data` argument.
#' @param grouping_columns Character vector of column names to use when
#' grouping to determine the context.
#'
#' @export
flatten_disease_hierarchy = function(data, disease_lookup) {
flatten_disease_hierarchy = function(data
, disease_lookup
, grouping_columns = c("period_start_date", "period_end_date", "location")
) {
disease_lookup =
(disease_lookup
|> select(disease, nesting_disease)
|> distinct())
(data

# getting basal disease for all diseases
|> rowwise()
|> mutate(basal_disease = basal_disease(disease, disease_lookup))
|> ungroup()

# keeping only leaf diseases
|> group_by(period_start_date, period_end_date, location, basal_disease)
|> group_by(across(c("basal_disease", all_of(grouping_columns)))) # period_start_date, period_end_date, location, basal_disease)
|> filter(is_leaf_disease(disease, nesting_disease))
|> ungroup()

Expand All @@ -704,3 +711,58 @@ flatten_disease_hierarchy = function(data, disease_lookup) {
|> select(-basal_disease)
)
}


time_scale_chooser = function(time_scale, which_fun) {
time_scale_order = c("wk", "2wk", "mo", "qr", "yr")
time_scale_factor = factor(
as.character(time_scale)
, levels = time_scale_order
)
rr = time_scale[which_fun(as.numeric(time_scale_factor))]
if (length(rr) == 0) browser()
rr
}

#' Filter out Time Scales
#'
#' Choose a single best `time_scale` for each year in a dataset, grouped by
#' nesting disease. This best `time_scale` is defined as the longest
#' of the shortest time scales in each location and sub-disease.
#'
#' @param data A tidy data set with a `time_scale` column.
#' @param initial_group Character vector naming columns for defining
#' the initial grouping used to compute the shortest time scales.
#' @param final_group Character vector naming columns for defining the final
#' grouping used to compute the longest of the shortest time scales.
#' @param cleanup Should intermediate columns be cleaned up?
#'
#' @importFrom lubridate year
#' @export
filter_out_time_scales = function(data
, initial_group = c("iso_3166", "iso_3166_2", "disease", "nesting_disease")
, final_group = c("nesting_disease")
, cleanup = TRUE
) {
time_scale_map = c(wk = "wk", yr = "yr", mo = "mo", `2wk` = "2wk", mt = "mo", `two-wks` = "2wk", qrtr = "qr")
data$time_scale = time_scale_map[as.character(data$time_scale)]
if (length(unique(data$time_scale)) == 1L) return(data)
# mutate(longest_time_scale = time_scale_chooser(time_scale, which.max))
new_data = (data
|> mutate(year = year(period_end_date))
|> group_by(across(all_of(c("year", initial_group))))
|> mutate(shortest_time_scale = time_scale_chooser(time_scale, which.min))
|> ungroup()
|> group_by(across(all_of(c("year", final_group))))
|> mutate(best_time_scale = time_scale_chooser(shortest_time_scale, which.max))
|> ungroup()
|> filter(time_scale == best_time_scale)
)
if (isTRUE(cleanup)) {
new_data = select(new_data
, -year, -shortest_time_scale, -best_time_scale
)
}
new_data
}

8 changes: 6 additions & 2 deletions R/iidda/R/repo_paths.R
Expand Up @@ -21,12 +21,16 @@
#' exists in \code{exploration_project_path}, an error is returned.
#' @param pipeline_repo_root Path to the folder of a cloned IIDDA pipeline
#' repository.
#' @param ... Additional arguments to pass to \code{\link{file.copy}}. A
#' useful argument here is `overwrite`, which indicates whether an existing
#' exploration script should be overwritten.
#'
#' @export
pipeline_exploration_starter = function(
script_filename,
exploration_project_path,
pipeline_repo_root = getwd()
pipeline_repo_root = getwd(),
...
) {
if (!dir.exists(exploration_project_path)) {
dir.create(exploration_project_path, recursive = TRUE)
Expand All @@ -37,7 +41,7 @@ pipeline_exploration_starter = function(
)
path = assert_path_does_not_exist(add_root(script_path, pipeline_repo_root))
template = system.file("pipeline_exploration_starter.R", package = "iidda")
file.copy(template, path)
file.copy(template, path, ...)
}

#' Set Extension
Expand Down
29 changes: 29 additions & 0 deletions R/iidda/man/filter_out_time_scales.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 13 additions & 5 deletions R/iidda/man/flatten_disease_hierarchy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion R/iidda/man/pipeline_exploration_starter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 38fcd55

Please sign in to comment.