version 0.4.4

cran · May 22, 2020 · 36acd2e · 36acd2e
1 parent 65f4c1c
commit 36acd2e
Show file tree

Hide file tree

Showing 26 changed files with 711 additions and 236 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: suddengains
 Title: Identify Sudden Gains in Longitudinal Data
-Date: 2020-03-14
-Version: 0.4.3
+Date: 2020-05-20
+Version: 0.4.4
 Authors@R: c(
   person("Milan", "Wiedemann", email = "milan.wiedemann@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-1991-282X")),
   person("Graham M", "Thew", role = "ctb", comment = c(ORCID = "0000-0003-2851-1315")),
@@ -19,14 +19,15 @@ BugReports: https://github.com/milanwiedemann/suddengains/issues
 LazyData: true
 Imports: dplyr (>= 0.8.0), tibble (>= 2.1.1), magrittr (>= 1.5), rlang
         (>= 0.3.4), stringr (>= 1.4.0), ggplot2 (>= 3.1.1), psych (>=
-        1.8.12), readr (>= 1.3.1), tidyr (>= 0.8.2), ggrepel (>= 0.8.0)
+        1.8.12), readr (>= 1.3.1), tidyr (>= 0.8.2), ggrepel (>=
+        0.8.0), patchwork (>= 1.0.0), forcats, naniar, scales
 Suggests: haven (>= 2.1.0), writexl (>= 1.1.0), knitr (>= 1.21), DT (>=
         0.5), rmarkdown (>= 1.11), spelling (>= 2.1)
-RoxygenNote: 7.0.2
+RoxygenNote: 7.1.0
 VignetteBuilder: knitr
 Language: en-US
 NeedsCompilation: no
-Packaged: 2020-03-14 19:15:54 UTC; milanwiedemann
+Packaged: 2020-05-22 17:24:58 UTC; milanwiedemann
 Author: Milan Wiedemann [aut, cre] (<https://orcid.org/0000-0003-1991-282X>),
   Graham M Thew [ctb] (<https://orcid.org/0000-0003-2851-1315>),
   Richard Stott [ctb] (<https://orcid.org/0000-0003-2533-5504>),
@@ -35,4 +36,4 @@ Author: Milan Wiedemann [aut, cre] (<https://orcid.org/0000-0003-1991-282X>),
   Wellcome Trust [fnd]
 Maintainer: Milan Wiedemann <milan.wiedemann@gmail.com>
 Repository: CRAN
-Date/Publication: 2020-03-15 01:20:09 UTC
+Date/Publication: 2020-05-22 22:40:03 UTC
diff --git a/MD5 b/MD5
@@ -1,54 +1,58 @@
-a17baa8933be5c0dc1ec3c8038cfef9f *DESCRIPTION
-6c1de4f16864e7b1fb45a17936f7bb0e *NAMESPACE
-a7c511425a89f5e1ba272c6d83e9f186 *NEWS.md
+feb1d09c16484dd579cc4c8ec36f2446 *DESCRIPTION
+45cdd0b6defddae88ed46ec0a303b8ab *NAMESPACE
+112f380ce0b5a0f6f93c0cd26db2d044 *NEWS.md
 aac060c746f7fb39344252e5dc59c0fa *R/check_interval.R
 08eb33cfabe439bf2a96a423785fec6e *R/count_intervals.R
 063e61b4b07a6ee316265d80675abad8 *R/create_byperson.R
-3d3cb0c2166278580214a730e8bc1f01 *R/create_bysg.R
+4028c3932aebf8331ce911947fc160ea *R/create_bysg.R
 41c0a0a109f1fefabfaf6ee77bfa67c8 *R/define_crit1_cutoff.R
-1f332dbde40223129c855ae14fb0f639 *R/describe_sg.R
-7f260109a0f04c9bf8797b1833fd27e1 *R/extract_values.R
-e0618eb1b88fe24d4b0270b6a39309bb *R/identify_sg.R
-d4a74abaf8c7d3e92649c8b9980ef8d6 *R/identify_sl.R
+17e1f85d4327bc6424d014d9866a73fe *R/describe_sg.R
+ba79176a1b3d56398652fc13c1d36a42 *R/extract_values.R
+b1c04f7ec7f8f6994205dc7c172bdb60 *R/identify_sg.R
+8f8771a45cb3d176b6587b2ff916d43c *R/identify_sl.R
 f583f5b5856f7cb5f2c5fbb04f39f8a8 *R/magrittrpipeoperator.R
 434aef93ef042b25f382f74f2fc998d6 *R/plot_sg.R
+083b47b900877136fdf2be77a90012b8 *R/plot_sg_intervals.R
 ebb2ca77c30da10025ed0040b280d9e9 *R/plot_sg_trajectories.R
 7ec62cc314be6630849e584257d3369b *R/rename_sg_vars.R
-ada6b9546d020f97d23f683340a32efd *R/select_cases.R
-37f5111e829a44d26ab17ec9d7ebdf9f *R/sgdata.R
-6c89650b927f998ee378b2570a39bb6c *R/sgdata_bad.R
-b6b85263aded1c4b3aea9dbb585c677f *R/suddengains.R
+2a86f6a260fa4edafb1c8ebce88c094c *R/select_cases.R
+31a8e985540e2a7921dd58bb2e31b7a7 *R/sgdata.R
+81cb43764280baed3f9e74de701e63f4 *R/suddengains.R
 0ab4d8322dd4b7ca36d015518a27cd78 *R/write_byperson.R
 1c3614344316647781545e4d54be0163 *R/write_bysg.R
 617176aef37d2ac6aac9db2c7b031b65 *README.md
-21e285bed5781d1d99e6cc91f19eb481 *build/vignette.rds
+637030606613b15e89f2a6a692886d82 *build/vignette.rds
 1aae8c00122076a7afbf638c5fffe83e *data/sgdata.RData
 abfd6092c9e0e0d6c6f39457778b7215 *data/sgdata_bad.RData
 7f082097ed6754bc2180ae9096b6ec70 *inst/CITATION
-0ce768e2c0cc6e4fcbcb6ab3405c4879 *inst/doc/suddengains-tutorial.R
-b3cb33e055fca09b79b84504da83ec54 *inst/doc/suddengains-tutorial.Rmd
-0f700e623878ea0544c70be820481e80 *inst/doc/suddengains-tutorial.html
+6b88cd47b56e7d497d071295ee812d69 *inst/doc/shinygains.Rmd
+5700856667b8cd9025e502b9997b62f2 *inst/doc/shinygains.html
+8355fa5c97a76e94f814272d11471a17 *inst/doc/suddengains-tutorial.R
+9760c07f72897bce63f76c6b6a7d1c0b *inst/doc/suddengains-tutorial.Rmd
+9aff0fcf32b0d547c252894786b3bfa2 *inst/doc/suddengains-tutorial.html
 658676ed1bb999f61dbbc6e5861b6fea *man/check_interval.Rd
 8ab622c2774dd46d68838ca0feb7182a *man/count_intervals.Rd
 5b7e2b60657464d35a03649b107033a5 *man/create_byperson.Rd
 7858361b03b80cd92577eea107607276 *man/create_bysg.Rd
 69bd468651d60a79d12cfc12e5519687 *man/define_crit1_cutoff.Rd
-b50b79e3e6339a39f9f106f40ca25fd2 *man/describe_sg.Rd
+eb4f231ab7b1b359f8e6976642d100d3 *man/describe_sg.Rd
 fc27a73033eaa958111140db3ab99781 *man/extract_values.Rd
 78245dfb7fa41824ff7f2304bdaec0bb *man/figures/README-unnamed-chunk-4-1.png
 1bd025784ffe0997e1d36acb229ef334 *man/figures/README-unnamed-chunk-5-1.png
 c551d79f8326f0574a87d84f1d32856e *man/identify_sg.Rd
 35c7c344604093a5e1e4becfa9ee880b *man/identify_sl.Rd
 df34053394040e7ed49c0c49fc239a67 *man/pipe.Rd
 c6a2c63380b4ff14e13d2c8fb7cc0855 *man/plot_sg.Rd
+a6e072bc793499cc9038f225e910922f *man/plot_sg_intervals.Rd
 53bb0dd86e9fba9d76c737bd7d5136a7 *man/plot_sg_trajectories.Rd
 8a528de9d7364d691383a5e2285be409 *man/rename_sg_vars.Rd
 042982de74cbde374ab27ed5aec9a5b9 *man/select_cases.Rd
-72819e2b60986eb19dd44ef6f9c6d9f8 *man/sgdata.Rd
-6ed5432b37bd7ddbaafe6d039f97685f *man/sgdata_bad.Rd
+04a76c10a203cf8ec6e211f0cba2ea48 *man/sgdata.Rd
+a99a8749a8b18bcdc6f8f7695546de6c *man/sgdata_bad.Rd
 989d9928f8fd73578cb8586dd9c71840 *man/write_byperson.Rd
 3ad68b79af6841d2fff07144ba1a423c *man/write_bysg.Rd
 0622a97a2aaa3c342f09636052c2d7f5 *tests/spelling.R
 99f6992a246e4719f00e69e8e7196bfe *vignettes/r-references.bib
 b10d7f33763f284d4831a1730fc3f130 *vignettes/references.bib
-b3cb33e055fca09b79b84504da83ec54 *vignettes/suddengains-tutorial.Rmd
+6b88cd47b56e7d497d071295ee812d69 *vignettes/shinygains.Rmd
+9760c07f72897bce63f76c6b6a7d1c0b *vignettes/suddengains-tutorial.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,9 +11,11 @@ export(extract_values)
 export(identify_sg)
 export(identify_sl)
 export(plot_sg)
+export(plot_sg_intervals)
 export(plot_sg_trajectories)
 export(select_cases)
 export(write_byperson)
 export(write_bysg)
+import(patchwork)
 importFrom(magrittr,"%>%")
 importFrom(rlang,":=")
diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,11 @@
+# suddengains 0.4.4
+- add function `plot_sg_intervals()` to visualise session to session intervals that are analysed for sudden gains 
+- minor changes to integrate updates in tibble package
+
 # suddengains 0.4.3
-- supress messages in `create_bysg()` and create_byperson that come from `extract_values()` function
-- update ggplot2 function with deprecated warning message 
-- clearer detailed output message in `check_interval()` function
+- Suppress message about vector length from `extract_values()` in the functions `create_bysg()` and `create_byperson()`
+- Update ggplot2 functions with "deprecated" warning message 
+- Add clearer detailed output message in `check_interval()` function
 
 # suddengains 0.4.2
 

diff --git a/R/create_bysg.R b/R/create_bysg.R
@@ -73,7 +73,7 @@ create_bysg <- function(data, sg_crit1_cutoff, id_var_name, sg_var_list, tx_star
 
     # Stop if no sudden gains were identified and return error
     if (sg_sum == 0) {
-      stop("No sudden gains were identified.", call. = FALSE)
+      warning("No sudden gains were identified.", call. = FALSE)
     }
 
   } else if (identify == "sl") {
@@ -97,7 +97,7 @@ create_bysg <- function(data, sg_crit1_cutoff, id_var_name, sg_var_list, tx_star
 
     # Stop if no sudden losses were identified and return error
     if (sg_sum == 0) {
-      stop("No sudden losses were identified.", call. = FALSE)
+      warning("No sudden losses were identified.", call. = FALSE)
     }
   }
 
@@ -237,6 +237,6 @@ create_bysg <- function(data, sg_crit1_cutoff, id_var_name, sg_var_list, tx_star
 
   # Return tibble
   data_bysg %>%
-    tibble::as.tibble() %>%
+    tibble::as_tibble() %>%
     dplyr::arrange(!! rlang::sym(id_var_name))
 }
diff --git a/R/describe_sg.R b/R/describe_sg.R
@@ -1,6 +1,6 @@
 #' Show descriptives for the sudden gains datasets
 #'
-#' Some numbers (percentages) will be different depending which form of data set (bysg vs. byperson) is selected, because if multiple gains in bysg only one will be selected for further analyses.
+#' Descriptives might differ between the bysg and byperson data sets depending on whether multiple gains are present.
 #' @param data A \code{bysg} or \code{byperson} dataset created using the function \code{\link{create_bysg}} or \code{\link{create_byperson}}.
 #' @param sg_data_structure String, indicating whether the input data is a \code{bysg} or \code{byperson} dataset.
 #' @return A list, showing basic descriptive statistics for sudden gains within the dataset specified.
@@ -34,23 +34,6 @@
 #' # Describe bysg dataset
 #' describe_sg(data = bysg,
 #'             sg_data_structure = "bysg")
-#'
-#' # Create byperson dataset
-#' byperson_first <- create_byperson(data = sgdata,
-#'                                   sg_crit1_cutoff = 7,
-#'                                   id_var_name = "id",
-#'                                   tx_start_var_name = "bdi_s1",
-#'                                   tx_end_var_name = "bdi_s12",
-#'                                   sg_var_list = c("bdi_s1", "bdi_s2", "bdi_s3",
-#'                                                   "bdi_s4", "bdi_s5", "bdi_s6",
-#'                                                   "bdi_s7", "bdi_s8", "bdi_s9",
-#'                                                   "bdi_s10", "bdi_s11", "bdi_s12"),
-#'                                   sg_measure_name = "bdi",
-#'                                   multiple_sg_select = "first")
-#'
-#' # Describe byperson dataset
-#' describe_sg(data = byperson_first,
-#'             sg_data_structure = "byperson")
 
 describe_sg <- function(data, sg_data_structure = c("bysg", "byperson")) {
 

diff --git a/R/extract_values.R b/R/extract_values.R
@@ -53,11 +53,12 @@ extract_values <- function(data, id_var_name, extract_var_list, sg_session_n_var
 
   if (is.list(extract_var_list) == TRUE) {
 
+    # If a list is specified, make sure that each element in the list has the same length
     if ((length(unique(lengths(extract_var_list))) == 1L) == FALSE) {
-      stop("Elements entered in list 'extract_var_list' must have the same length.", call. = FALSE)
+      stop("Each element entered in the list 'extract_var_list' must have the same length.", call. = FALSE)
     }
 
-    message("Note: Each element specified in 'extract_var_list' must have the same number of repeated time points as the measure used to identify sudden gains.")
+    message("Note: The measures specified in 'extract_var_list' must all have the same number of repeated time points as the measure used to identify sudden gains.")
 
     # create data with ids for loop to add
     data_loop <- dplyr::select(data, id_var_name)
@@ -112,7 +113,7 @@ extract_values <- function(data, id_var_name, extract_var_list, sg_session_n_var
 
   } else if (is.vector(extract_var_list) == TRUE) {
 
-    message("Note: The vector(s) specified in 'extract_var_list' must have the same number of repeated time points as the measure used to identify sudden gains.")
+    message("Note: The measure specified in 'extract_var_list' must have the same number of repeated time points as the measure used to identify sudden gains.")
 
     # create data with ids for loop to add
     data_loop <- dplyr::select(data, id_var_name)

diff --git a/R/identify_sg.R b/R/identify_sg.R
@@ -240,7 +240,7 @@ identify_sg <- function(data, id_var_name, sg_var_list, sg_crit1_cutoff, sg_crit
             # Return dataframe with details about each criteria instead of combined crit123
             data_crit123_details %>%
                 dplyr::arrange(!! rlang::sym(id_var_name)) %>%
-                tibble::as.tibble()
+                tibble::as_tibble()
 
         } else if (crit123_details == FALSE) {
 
@@ -251,6 +251,6 @@ identify_sg <- function(data, id_var_name, sg_var_list, sg_crit1_cutoff, sg_crit
             data_select %>%
                 dplyr::left_join(data_crit123, by = id_var_name) %>%
                 dplyr::arrange(!! rlang::sym(id_var_name)) %>%
-                tibble::as.tibble()
+                tibble::as_tibble()
         }
 }
diff --git a/R/identify_sl.R b/R/identify_sl.R
@@ -243,7 +243,7 @@ identify_sl <- function(data, id_var_name, sg_var_list, sg_crit1_cutoff, sg_crit
             # Return dataframe with details about each criteria instead of combined crit123
             data_crit123_details %>%
                 dplyr::arrange(!! rlang::sym(id_var_name)) %>%
-                tibble::as.tibble()
+                tibble::as_tibble()
 
         } else if (crit123_details == FALSE) {
 
@@ -254,6 +254,6 @@ identify_sl <- function(data, id_var_name, sg_var_list, sg_crit1_cutoff, sg_crit
             data_select %>%
                 dplyr::left_join(data_crit123, by = id_var_name) %>%
                 dplyr::arrange(!! rlang::sym(id_var_name)) %>%
-                tibble::as.tibble()
+                tibble::as_tibble()
         }
 }
diff --git a/R/plot_sg_intervals.R b/R/plot_sg_intervals.R
@@ -0,0 +1,104 @@
+#' Plot summary of available data per time point and analysed session to session intervals
+#'
+#' @param data A data set in wide format including an ID variable and variables for each measurement point.
+#' @param id_var_name String, specifying the name of the ID variable. Each row should have a unique value.
+#' @param sg_var_list Vector, specifying the variable names of each measurement point sequentially.
+#' @param identify_sg_1to2 Logical, indicating whether to identify sudden losses from measurement point 1 to 2.
+#' If set to TRUE, this implies that the first variable specified in \code{sg_var_list} represents a baseline measurement point, e.g. pre-intervention assessment.
+#' @return Plot showing percentage of available data per time point and percentage of session to session intervals that wer analysed for sudden gains.
+#' @export
+#' @import patchwork
+#' @examples # Create plot
+#' plot_sg_intervals(data = sgdata,
+#'                   id_var_name = "id",
+#'                   sg_var_list = c("bdi_s1", "bdi_s2", "bdi_s3",
+#'                                   "bdi_s4", "bdi_s5", "bdi_s6",
+#'                                   "bdi_s7", "bdi_s8", "bdi_s9",
+#'                                   "bdi_s10", "bdi_s11", "bdi_s12"))
+#'
+plot_sg_intervals <- function(data, id_var_name, sg_var_list, identify_sg_1to2 = FALSE) {
+
+    # Select data
+    sgdata_miss <- data %>%
+        dplyr::select(id_var_name, sg_var_list)
+
+    # Calculate data for first plot
+    data_missing <- naniar::miss_var_summary(sgdata_miss) %>%
+        dplyr::mutate(pct_miss = pct_miss / 100,
+               pct_available = 1 - pct_miss,
+               variable = forcats::fct_rev(factor(variable, levels = sg_var_list))) %>%
+        tidyr::pivot_longer(cols = c("pct_miss", "pct_available")) %>%
+        dplyr::mutate(name = factor(name, levels = c("pct_miss", "pct_available"), labels = c("Missing Data", "Available Data")))
+
+    # Create first plot
+    plot_sg_miss_pct <- data_missing %>%
+        dplyr::filter(variable != "id") %>%
+        ggplot2::ggplot(ggplot2::aes(x = variable, y = value, fill = name)) +
+        ggplot2::geom_bar(position = "fill",stat = "identity") +
+        # or:
+        # geom_bar(position = position_fill(), stat = "identity")
+        ggplot2::scale_y_continuous(labels = scales::percent_format()) +
+        # scale_fill_manual(values = c("red", "yellow"))
+        ggplot2::scale_fill_viridis_d(name = "", direction = -1, alpha = .8) +
+        # papaja::theme_apa() +
+        ggplot2::labs(title = "Percentage of available data per time point", x = "", y = "") +
+        ggplot2::theme(legend.position = "right")+
+        ggplot2::theme(legend.key = ggplot2::element_rect(color = NA, fill = NA),
+              legend.key.size = ggplot2::unit(0.9, "cm")) +
+        ggplot2::theme(legend.title.align = 1.5) +
+        ggplot2::coord_flip()
+
+
+    # Now create date for second plot
+    count_intervals_data <- suddengains::count_intervals(data = data,
+                                                         id_var_name = id_var_name,
+                                                         sg_var_list = sg_var_list)
+
+    plot_count_intervals_data_temp <- tibble::tibble(total = count_intervals_data[[1]],
+                                             total_not_available_sg = count_intervals_data[[1]] - count_intervals_data[[2]],
+                                             total_between_sess_sg = count_intervals_data[[2]],
+                                             available_between_sess_sg = count_intervals_data[[3]],
+                                             not_available_between_sess_sg = count_intervals_data[[4]]) %>%
+        dplyr::select(total_not_available_sg, available_between_sess_sg, not_available_between_sess_sg) %>%
+        tidyr::pivot_longer(cols = 1:3) %>%
+        dplyr::mutate(pct = value / count_intervals_data[[1]])
+
+
+    # More data stuff here get things ready for labels legend etc, adding numbers
+    plot_count_intervals_data <- plot_count_intervals_data_temp %>%
+        dplyr::mutate(id = "count_intervals",
+               name = factor(name,
+                             levels = c("not_available_between_sess_sg", "total_not_available_sg", "available_between_sess_sg"),
+                             labels = c(paste0("Not Analysed Type 1\n(n = ", plot_count_intervals_data_temp$value[3],", ", round(plot_count_intervals_data_temp$pct[3] * 100, 0), "%)"),
+                                        paste0("Not Analysed Type 2\n(n = ", plot_count_intervals_data_temp$value[1] ,", ", round(plot_count_intervals_data_temp$pct[1] * 100, 0), "%)"),
+                                        paste0("Analysed\n(n = ", plot_count_intervals_data_temp$value[2] ,", ", round(plot_count_intervals_data_temp$pct[2] * 100, 0), "%)"))))
+
+    # Create second plot
+    plot_sg_intervals <- plot_count_intervals_data %>%
+        ggplot2::ggplot(ggplot2::aes(x = id, y = value, fill = name)) +
+        ggplot2::geom_bar(position = "fill", stat = "identity") +
+        ggplot2::scale_fill_viridis_d(name = "", direction = -1, alpha = .8) +
+        ggplot2::labs(title = "Percentage of session to session intervals analysed",
+                      subtitle = paste0("Total number of intervals, n = ", sum(plot_count_intervals_data_temp$value))) +
+        ggplot2::theme(legend.position = "right") +
+        ggplot2::labs(x = "", y = "", caption = "Not Analysed Type 1: The total number of session to session intervals that can not be analysed for sudden gains due to the pattern of missing data.\n\nNot Analysed Type 2: The total number of session to session intervals from the first to the second and the second last to the last session.") +
+        ggplot2::theme(legend.key = ggplot2::element_rect(color = NA, fill = NA),
+              legend.key.size = ggplot2::unit(0.9, "cm")) +
+        ggplot2::theme(legend.title.align = 1.5) +
+        ggplot2::scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
+        ggplot2::theme(axis.text.y = ggplot2::element_blank(),
+              axis.ticks.y = ggplot2::element_blank(),
+              plot.caption = ggplot2::element_text(hjust = 0)) +
+        ggplot2::coord_flip()
+
+
+    # Combine Plots
+
+    plot_return <-  patchwork::wrap_plots(plot_sg_miss_pct, plot_sg_intervals) +
+        patchwork::plot_layout(heights = c(4, 1)) + patchwork::plot_annotation(tag_levels = 'A')
+
+    # Return plot
+    return(plot_return)
+
+}
+
diff --git a/R/select_cases.R b/R/select_cases.R
@@ -85,7 +85,7 @@ select_cases <- function(data, id_var_name, sg_var_list, method = c("pattern", "
       data_pattern <- base::cbind(id_list, data_pattern)
 
       data_out <- data_pattern %>%
-          tibble::as.tibble() %>%
+          tibble::as_tibble() %>%
           tidyr::unite("pattern", sg_var_list, sep = " ") %>%
           dplyr::mutate(sg_pattern_1 = stringr::str_detect(pattern, "TRUE TRUE TRUE TRUE"),
                sg_pattern_2 = stringr::str_detect(pattern, "TRUE TRUE TRUE FALSE TRUE"),