insightsengineering · imazubi · Oct 26, 2021 · Sep 1, 2021 · Sep 1, 2021 · Sep 9, 2021
diff --git a/R/abnormal_by_worst_grade.R b/R/abnormal_by_worst_grade.R
@@ -1,100 +1,142 @@
 #' Patient Counts with the Most Extreme Post-baseline Toxicity Grade per Direction of Abnormality
 #'
-#' Primary analysis variable `.var` indicates the toxicity grade (numeric), and additional
-#'   analysis variables are `id` (character or factor) and `worst_grade_flag` (logical). For each
-#'   direction specified in `abnormal` (e.g. high or low) we condition on the worst grade flag and count
-#'   patients in the denominator as number of patients with at least one valid measurement during treatment,
-#'   and patients in the numerator as follows:
-#' * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
-#' * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
+#' Primary analysis variable `.var` indicates the toxicity grade (factor), and additional
+#' analysis variables are `id` (character or factor), `param` (`factor`) and `grade_dir` (`factor`).
+#' The pre-processing steps are crucial when using this function.
+#' For a certain direction (e.g. high or low) this function counts
+#' patients in the denominator as number of patients with at least one valid measurement during treatment,
+#' and patients in the numerator as follows:
+#'  * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
+#'  * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
 #'
-#' @details Note that `df` should be filtered to include only post-baseline records.
+#' @details
+#' The pre-processing steps are crucial when using this function. From the standard
+#' lab grade variable `ATOXGR`, derive the following two variables:
+#' * A grade direction variable (e.g. `GRADE_DIR`) is required in order to obtain
+#'   the correct denominators when building the layout as it is used to define row splitting.
+#' * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from
+#'   `ATOXGR` are replaced by their absolute values.
+#' * Prior to tabulation, `df` must filtered to include only post-baseline records with worst grade flags.
 #'
 #' @inheritParams argument_convention
-#' @param abnormal (`character`)\cr identifying the abnormality direction.
 #'
 #' @name abnormal_by_worst_grade
 #'
 NULL
 
-#' @describeIn abnormal_by_worst_grade Statistics function which counts patients with worst grade
-#'   for a single `abnormal` level, consisting of counts and percentages of patients with worst grade
+#' @describeIn abnormal_by_worst_grade Statistics function which counts patients with worst grade,
+#'   consisting of counts and percentages of patients with worst grade
 #'   `1` to `4`, and `Any` non-zero grade.
 #' @return [s_count_abnormal_by_worst_grade()] the single statistic `count_fraction` with grade 1 to 4
 #'   and "Any" results.
 #'
-#' @importFrom stats setNames
-#'
 #' @export
 #'
 #' @examples
 #' library(scda)
 #' library(dplyr)
+#' library(forcats)
 #'
 #' adlb <- synthetic_cdisc_data("latest")$adlb
+#'
+#' # Data is modified in order to have some parameters with grades only in one direction
+#' # and simulate the real data.
+#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
+#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
+#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
+#'
+#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
+#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
+#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
+#'
+#' # Here starts the real pre-processing.
 #' adlb_f <- adlb %>%
 #'   filter(!AVISIT %in% c("SCREENING", "BASELINE")) %>%
 #'   mutate(
-#'     ATOXGR = as.numeric(as.character(ATOXGR)),
-#'     WGRLOFL = case_when(WGRLOFL == "Y" ~ TRUE, TRUE ~ FALSE),
-#'     WGRHIFL = case_when(WGRHIFL == "Y" ~ TRUE, TRUE ~ FALSE)
-#'   )
+#'     GRADE_DIR = factor(case_when(
+#'      ATOXGR %in% c("-1", "-2", "-3", "-4") ~ "LOW",
+#'      ATOXGR == "0" ~ "ZERO",
+#'      ATOXGR %in% c("1", "2", "3", "4") ~ "HIGH"),
+#'      levels = c("LOW", "ZERO", "HIGH")),
+#'     GRADE_ANL = fct_relevel(
+#'      fct_recode(ATOXGR,
+#'       `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
+#'        c("0", "1", "2", "3", "4")
+#'      )
+#'    ) %>%
+#'   filter(WGRLOFL == "Y" | WGRHIFL == "Y") %>%
+#'   droplevels()
 #'
-#' s_count_abnormal_by_worst_grade(
-#'   df = adlb_f %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
-#'   .var = "ATOXGR",
-#'   abnormal = "low",
-#'   variables = list(id = "USUBJID", worst_grade_flag = "WGRLOFL")
+#' adlb_f_alt <- adlb_f %>% filter(PARAMCD == "ALT") %>% droplevels()
+#' full_parent_df <- list(adlb_f_alt, "not_needed")
+#' cur_col_subset <- list(rep(TRUE, nrow(adlb_f_alt)), "not_needed")
+#'
+#' # This mimics a split structure on PARAM and GRADE_DIR for a total column
+#' spl_context <- data.frame(
+#'   split = c("PARAM", "GRADE_DIR"),
+#'   full_parent_df = I(full_parent_df),
+#'   cur_col_subset = I(cur_col_subset)
 #' )
+#'
 #' s_count_abnormal_by_worst_grade(
-#'   df = adlb_f %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
-#'   .var = "ATOXGR",
-#'   abnormal = "high",
-#'   variables = list(id = "USUBJID", worst_grade_flag = "WGRHIFL")
+#'   df = adlb_f_alt,
+#'   .spl_context = spl_context,
+#'   .var = "GRADE_ANL"
 #' )
 #'
-s_count_abnormal_by_worst_grade <- function(df, #nolint
-                                            .var = "ATOXGR",
-                                            abnormal = c("low", "high"),
-                                            variables = list(id = "USUBJID", worst_grade_flag = "WGRLOFL")) {
-  abnormal <- match.arg(abnormal)
+s_count_abnormal_by_worst_grade <- function(df = adlb_f, #nolint
+                                            .var = "GRADE_ANL",
+                                            .spl_context,
+                                            variables = list(
+                                              id = "USUBJID",
+                                              param  = "PARAM",
+                                              grade_dir = "GRADE_DIR"
+                                            )) {
+
   assert_that(
     is.string(.var),
-    is.string(abnormal),
-    is.list(variables),
-    all(names(variables) %in% c("id", "worst_grade_flag")),
-    is_df_with_variables(df, c(aval = .var, variables)),
-    is_numeric_vector(df[[.var]]),
-    is_logical_vector(df[[variables$worst_grade_flag]]),
-    is_character_or_factor(df[[variables$id]])
+    is_df_with_variables(df, c(a = .var, variables)),
+    is_valid_factor(df[[.var]]),
+    is_character_or_factor(df[[variables$id]]),
+    is_valid_factor(df[[variables$param]]),
+    is_valid_factor(df[[variables$grade_dir]])
   )
-
-  df <- df[!is.na(df[[.var]]), ]
-  anl <- data.frame(
-    id = df[[variables$id]],
-    grade = df[[.var]],
-    flag = df[[variables$worst_grade_flag]],
-    stringsAsFactors = FALSE
+  # To verify that the `split_rows_by` are performed with correct variables.
+  assert_that(
+    all(
+      c(variables[["param"]], variables[["grade_dir"]]) %in% .spl_context$split), #nolint
+    msg = paste(
+      "variabes$param and variables$grade_dir must match",
+      "the variables used for splitting rows in the layout."
+    )
   )
-  # Denominator is number of patients with at least one valid measurement during treatment
-  n <- length(unique(anl$id))
-  # Numerator is number of patients with worst high grade (grade 1 to 4) or low grade (grade -1 to -4)
-  if (abnormal == "low") {
-    anl_abn <- anl[anl$flag & anl$grade < 0, , drop = FALSE]
-    grades <- setNames(- (1:4), as.character(1:4))
-  } else if (abnormal == "high") {
-    anl_abn <- anl[anl$flag & anl$grade > 0, , drop = FALSE]
-    grades <- setNames(1:4, as.character(1:4))
-  }
+  first_row <- .spl_context[.spl_context$split == variables[["param"]], ] #nolint
+  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
+  result <- split(numeric(0), factor(x_lvls))
+
+  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
+  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
+  # Some subjects may have a record for high and low directions but
+  # should be counted only once.
+  denom <- length(unique(subj_cur_col))
+
+  for (lvl in x_lvls) {
+    if (lvl != "Any") {
+
+      num <- sum(df[[.var]] == lvl)
+      fraction <- ifelse(denom == 0, 0, num / denom)
 
-  by_grade <- lapply(grades, function(i) {
-    num <- length(unique(anl_abn[anl_abn$grade == i, "id", drop = TRUE]))
-    c(num, num / n)
-  })
-  # Numerator for "Any" grade is number of patients with at least one high/low abnormality
-  any_grade_num <- length(unique(anl_abn$id))
+    } else {
 
-  list(count_fraction = c(by_grade, list("Any" = c(any_grade_num, any_grade_num / n))))
+      num <- sum(df[[.var]] != 0)
+      fraction <- ifelse(denom == 0, 0, num / denom)
+
+    }
+    result[[lvl]] <- with_label(c(count = num, fraction = fraction), lvl)
+  }
+
+  result <- list(count_fraction = result)
+  result
 }
 
 #' @describeIn abnormal_by_worst_grade Formatted Analysis function which can be further customized by calling
@@ -105,12 +147,7 @@ s_count_abnormal_by_worst_grade <- function(df, #nolint
 #' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
 #' # so that the rtables formatting function `format_count_fraction()` can be applied correctly.
 #' afun <- make_afun(a_count_abnormal_by_worst_grade, .ungroup_stats = "count_fraction")
-#' afun(
-#'   df = adlb_f %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
-#'   .var = "ATOXGR",
-#'   abnormal = "high",
-#'   variables = list(id = "USUBJID", worst_grade_flag = "WGRHIFL")
-#' )
+#' afun(df = adlb_f_alt, .spl_context = spl_context)
 #'
 a_count_abnormal_by_worst_grade <- make_afun(  #nolint
   s_count_abnormal_by_worst_grade,
@@ -122,40 +159,32 @@ a_count_abnormal_by_worst_grade <- make_afun(  #nolint
 #' @export
 #' @examples
 #'
-#' basic_table() %>%
-#'   count_abnormal_by_worst_grade(
-#'     var = "ATOXGR",
-#'     abnormal = c(Low = "low", High = "high"),
-#'     variables = list(id = "USUBJID", worst_grade_flag = c(Low = "WGRLOFL", High = "WGRHIFL"))
-#'   ) %>%
-#'   build_table(df = adlb_f %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"))
+#' # Map excludes records without abnormal grade since they should not be displayed
+#' # in the table.
+#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
+#'  lapply(as.character) %>%
+#'  as.data.frame() %>%
+#'  arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
 #'
 #' basic_table() %>%
 #'   split_cols_by("ARMCD") %>%
-#'   split_rows_by("PARAMCD") %>%
+#'   split_rows_by("PARAM") %>%
+#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
 #'   count_abnormal_by_worst_grade(
-#'     var = "ATOXGR",
-#'     abnormal = c(Low = "low", High = "high"),
-#'     variables = list(id = "USUBJID", worst_grade_flag = c(Low = "WGRLOFL", High = "WGRHIFL"))
+#'     var = "GRADE_ANL",
+#'     variables = list(id = "USUBJID", param  = "PARAM", grade_dir = "GRADE_DIR")
 #'   ) %>%
 #'   build_table(df = adlb_f)
 #'
+#'
 count_abnormal_by_worst_grade <- function(lyt,
                                           var,
-                                          abnormal,
-                                          variables,
                                           ...,
-                                          table_names = abnormal,
                                           .stats = NULL,
                                           .formats = NULL,
                                           .labels = NULL,
                                           .indent_mods = NULL) {
-  assert_that(
-    is.string(var),
-    !is.null(names(abnormal)),
-    setequal(names(abnormal), names(variables$worst_grade_flag)),
-    is_equal_length(abnormal, table_names)
-  )
+
   afun <- make_afun(
     a_count_abnormal_by_worst_grade,
     .stats = .stats,
@@ -164,19 +193,11 @@ count_abnormal_by_worst_grade <- function(lyt,
     .indent_mods = .indent_mods,
     .ungroup_stats = "count_fraction"
   )
-  for (i in seq_along(abnormal)) {
-    abn <- abnormal[i]
-    varlist <- variables
-    varlist$worst_grade_flag <- varlist$worst_grade_flag[names(abn)]
-    lyt <- analyze(
-      lyt = lyt,
-      vars = var,
-      var_labels = names(abn),
-      table_names = table_names[i],
-      afun = afun,
-      extra_args = c(list(abnormal = abn, variables = varlist), list(...)),
-      show_labels = "visible"
-    )
-  }
-  lyt
+  analyze(
+    lyt = lyt,
+    vars = var,
+    afun = afun,
+    extra_args = list(...),
+    show_labels = "hidden"
+  )
 }
diff --git a/R/argument_convention.R b/R/argument_convention.R
@@ -23,6 +23,8 @@
 #' @param .labels (named `character`)\cr labels for the statistics (without indent).
 #' @param .var (`string`)\cr single variable name that is passed by `rtables` when requested
 #'   by a statistics function.
+#' @param .spl_context (`data frame`)\cr gives information about ancestor split states
+#'   that is passed by `rtables`.
 #' @param col_by (`factor`)\cr defining column groups.
 #' @param conf_level (`proportion`)\cr confidence level of the interval.
 #' @param data (`data.frame`)\cr the dataset containing the variables to summarize.