diff --git a/NAMESPACE b/NAMESPACE index 47c23faf..136a7d49 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,10 @@ export("%>%") export(bwplot) export(densityplot) export(ggmice) -export(plot_chains) export(plot_corr) export(plot_pattern) export(plot_pred) +export(plot_trace) export(stripplot) export(theme_mice) export(xyplot) diff --git a/R/corr.R b/R/corr.R new file mode 100644 index 00000000..c2344545 --- /dev/null +++ b/R/corr.R @@ -0,0 +1,56 @@ +#' Plot correlations between (incomplete) variables +#' +#' @param dat A dataset of class `data.frame`, `tibble`, or `matrix`. +#' @param vrb String or vector with variable name(s), default is "all". +#' @param label Logical indicating whether correlation values should be displayed. +#' @param square Logical indicating whether the plot tiles should be squares. +#' @param diagonal Logical indicating whether the correlation of each variable with itself should be displayed. +#' @param rotate Logical indicating whether the variable name labels should be rotated 90 degrees. +#' +#' @return An object of class `ggplot`. +#' +#' @examples +#' plot_corr(mice::nhanes, label = TRUE) +#' @export +plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE, diagonal = FALSE, rotate = FALSE) { + if (!is.data.frame(dat) & !is.matrix(dat)) { + stop("Dataset should be a 'data.frame' or 'matrix'.") + } + if (vrb[1] == "all") { + vrb <- names(dat) + } + p <- length(vrb) + corrs <- data.frame( + vrb = rep(vrb, each = p), + prd = vrb, + corr = matrix(round(stats::cov2cor(stats::cov(data.matrix(dat[, vrb]), use = "pairwise.complete.obs")), 2), nrow = p * p, byrow = TRUE) + ) + if (!diagonal) { + corrs[corrs$vrb == corrs$prd, "corr"] <- NA + } + gg <- ggplot2::ggplot(corrs, ggplot2::aes(x = .data$prd, y = .data$vrb, label = .data$corr, fill = .data$corr)) + + ggplot2::geom_tile(color = "black") + + ggplot2::scale_x_discrete(limits = vrb, position = "top") + + ggplot2::scale_y_discrete(limits = rev(vrb)) + + ggplot2::scale_fill_gradient2(low = "deepskyblue", mid = "lightyellow", high = "orangered", na.value = "white", limits = c(-1, 1)) + + ggplot2::labs( + x = "Imputation model predictor", + y = "Variable to impute", + fill = "Correlation*", + caption = "*pairwise complete observations" + ) + + theme_minimice() + if (label) { + gg <- gg + ggplot2::geom_text(color = "black", show.legend = FALSE) + } + if (square) { + gg <- gg + ggplot2::coord_fixed() + } + if (rotate) { + gg <- gg + ggplot2::theme(axis.text.x.top = ggplot2::element_text(angle = 90)) + } + return(gg) +} + +# TODO: add plot for missingness indicators predictors +# TODO: maybe add model.matrix argument to correlation plot? diff --git a/R/flux.R b/R/flux.R new file mode 100644 index 00000000..24ec63e9 --- /dev/null +++ b/R/flux.R @@ -0,0 +1,18 @@ +# plot_flux <- function(dat) { +# # escape function if dataset is complete +# # if(!any(is.na(dat))){return(plot_a_mouse())} +# # plot in and outflux +# flx <- mice::flux(dat) %>% cbind(variable = rownames(.)) +# gg <- flx %>% +# ggplot2::ggplot(ggplot2::aes(x = influx, +# y = outflux, +# label = variable)) + +# ggplot2::geom_abline(intercept = 1, +# slope = -1, +# linetype = "dashed") + +# ggplot2::geom_text(position = ggplot2::position_jitter(width = 0.01, height = 0.01)) + +# ggplot2::lims(x = c(-0.01, 1.01), y = c(-0.01, 1.01)) + +# ggplot2::theme_classic() +# # output +# return(gg) +# } diff --git a/R/ggmice.R b/R/ggmice.R index 194c85cc..497635bb 100644 --- a/R/ggmice.R +++ b/R/ggmice.R @@ -21,8 +21,8 @@ ggmice <- function(data = NULL, mapping = ggplot2::aes()) { if (is.character(mapping$x) | is.character(mapping$y)) { stop("The mapping argument requires variable name(s) of type 'quosure', typically created with ggplot2::aes(). To supply a string instead, try using ggplot2::aes_string()") } - if (any(c("colour", "fill") %in% mapping_args)) { - warning("The aes() arguments 'colour', 'fill' and 'group' have a special use in ggmmice() and will be overwritten. Try using 'shape' or 'linetype' for additional mapping, or use faceting.") + if ("colour" %in% mapping_args) { + warning("The aes() argument 'colour' has a special use in ggmmice() and will be overwritten. Try using 'shape' or 'linetype' for additional mapping, or use faceting.") } # extract variable names from mapping object if (mice::is.mids(data)) { @@ -48,7 +48,7 @@ ggmice <- function(data = NULL, mapping = ggplot2::aes()) { ), .imp = factor(.imp, ordered = TRUE) ) - mice_mapping <- utils::modifyList(mapping, ggplot2::aes(colour = .where, fill = .where)) + mice_mapping <- utils::modifyList(mapping, ggplot2::aes(colour = .where)) #, fill = .where mice_colors <- c("observed" = "#006CC2B3", "imputed" = "#B61A51B3") } else { where_xy <- rowSums(is.na(as.matrix(data[, c(vrb_x, vrb_y)]))) > 0L @@ -60,13 +60,13 @@ ggmice <- function(data = NULL, mapping = ggplot2::aes()) { }), .where = factor(where_xy, levels = c(FALSE, TRUE), labels = c("observed", "missing"), ordered = TRUE) ) - mice_mapping <- utils::modifyList(mapping, ggplot2::aes(colour = .where, fill = .where)) + mice_mapping <- utils::modifyList(mapping, ggplot2::aes(colour = .where)) #, fill = .where mice_colors <- c("observed" = "#006CC2B3", "missing" = "#B61A51B3") } # create plot gg <- ggplot2::ggplot(data = mice_data, mapping = mice_mapping) + ggplot2::scale_color_manual(values = mice_colors, drop = TRUE, name = "") + - ggplot2::scale_fill_manual(values = mice_colors, drop = TRUE, name = "") + + #ggplot2::scale_fill_manual(values = mice_colors, drop = TRUE, name = "") + theme_mice() if (!mice::is.mids(data)) { gg <- gg + diff --git a/R/pattern.R b/R/pattern.R index c041157b..62be430f 100644 --- a/R/pattern.R +++ b/R/pattern.R @@ -67,3 +67,5 @@ plot_pattern <- function(dat, square = FALSE, rotate = FALSE) { return(gg) } + + diff --git a/R/pred.R b/R/pred.R index 69abfd2b..bf01b2f5 100644 --- a/R/pred.R +++ b/R/pred.R @@ -47,61 +47,4 @@ plot_pred <- function(pred, label = FALSE, square = TRUE, rotate = FALSE) { return(gg) } -#' Plot correlations between (incomplete) variables -#' -#' @param dat A dataset of class `data.frame`, `tibble`, or `matrix`. -#' @param vrb String or vector with variable name(s), default is "all". -#' @param label Logical indicating whether correlation values should be displayed. -#' @param square Logical indicating whether the plot tiles should be squares. -#' @param diagonal Logical indicating whether the correlation of each variable with itself should be displayed. -#' @param rotate Logical indicating whether the variable name labels should be rotated 90 degrees. -#' -#' @return An object of class `ggplot`. -#' -#' @examples -#' plot_corr(mice::nhanes, label = TRUE) -#' @export -plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE, diagonal = FALSE, rotate = FALSE) { - if (!is.data.frame(dat) & !is.matrix(dat)) { - stop("Dataset should be a 'data.frame' or 'matrix'.") - } - if (vrb[1] == "all") { - vrb <- names(dat) - } - p <- length(vrb) - corrs <- data.frame( - vrb = rep(vrb, each = p), - prd = vrb, - corr = matrix(round(stats::cov2cor(stats::cov(data.matrix(dat[, vrb]), use = "pairwise.complete.obs")), 2), nrow = p * p, byrow = TRUE) - ) - if (!diagonal) { - corrs[corrs$vrb == corrs$prd, "corr"] <- NA - } - gg <- ggplot2::ggplot(corrs, ggplot2::aes(x = .data$prd, y = .data$vrb, label = .data$corr, fill = .data$corr)) + - ggplot2::geom_tile(color = "black") + - ggplot2::scale_x_discrete(limits = vrb, position = "top") + - ggplot2::scale_y_discrete(limits = rev(vrb)) + - ggplot2::scale_fill_gradient2(low = "deepskyblue", mid = "lightyellow", high = "orangered", na.value = "white", limits = c(-1, 1)) + - ggplot2::labs( - x = "Imputation model predictor", - y = "Variable to impute", - fill = "Correlation*", - caption = "*pairwise complete observations" - ) + - theme_minimice() - if (label) { - gg <- gg + ggplot2::geom_text(color = "black", show.legend = FALSE) - } - if (square) { - gg <- gg + ggplot2::coord_fixed() - } - if (rotate) { - gg <- gg + ggplot2::theme(axis.text.x.top = ggplot2::element_text(angle = 90)) - } - return(gg) -} - # TODO: add imputation method to pred plot -# TODO: add plot for missingness indicators predictors -# TODO: maybe add model.matrix argument to correlation plot? -# TODO: add argument to rotate/shorten variable names diff --git a/R/theme.R b/R/theme.R index 5a8237ce..71b2bc1b 100644 --- a/R/theme.R +++ b/R/theme.R @@ -27,3 +27,5 @@ theme_minimice <- function() { panel.grid.minor = ggplot2::element_blank() ) } + +# TODO: make facets in plot_trace() look more pretty diff --git a/R/chains.R b/R/trace.R similarity index 89% rename from R/chains.R rename to R/trace.R index 498d40bb..91bcf7a2 100644 --- a/R/chains.R +++ b/R/trace.R @@ -1,5 +1,5 @@ # plot convergence -#' Convergence plot for Multiply Imputed Data Sets +#' Plot the trace lines of the MICE algorithm for convergence evaluation #' #' @param imp An object of class `mids`. #' @param vrb String or vector with variable name(s), default is "all". @@ -8,9 +8,9 @@ #' #' @examples #' imp <- mice::mice(mice::nhanes, print = FALSE) -#' plot_chains(imp) +#' plot_trace(imp) #' @export -plot_chains <- function(imp, vrb = "all") { +plot_trace <- function(imp, vrb = "all") { if (!mice::is.mids(imp)) { stop("argument 'imp' must be a 'mids' object", call. = FALSE) } @@ -56,3 +56,5 @@ plot_chains <- function(imp, vrb = "all") { ) + theme_mice() } + +# TODO: make iterations and statistic arguments as well diff --git a/man/plot_corr.Rd b/man/plot_corr.Rd index 206f2444..6137ed23 100644 --- a/man/plot_corr.Rd +++ b/man/plot_corr.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pred.R +% Please edit documentation in R/corr.R \name{plot_corr} \alias{plot_corr} \title{Plot correlations between (incomplete) variables} diff --git a/man/plot_chains.Rd b/man/plot_trace.Rd similarity index 52% rename from man/plot_chains.Rd rename to man/plot_trace.Rd index 82974443..d6500743 100644 --- a/man/plot_chains.Rd +++ b/man/plot_trace.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chains.R -\name{plot_chains} -\alias{plot_chains} -\title{Convergence plot for Multiply Imputed Data Sets} +% Please edit documentation in R/trace.R +\name{plot_trace} +\alias{plot_trace} +\title{Plot the trace lines of the MICE algorithm for convergence evaluation} \usage{ -plot_chains(imp, vrb = "all") +plot_trace(imp, vrb = "all") } \arguments{ \item{imp}{An object of class \code{mids}.} @@ -15,9 +15,9 @@ plot_chains(imp, vrb = "all") An object of class \code{ggplot}. } \description{ -Convergence plot for Multiply Imputed Data Sets +Plot the trace lines of the MICE algorithm for convergence evaluation } \examples{ imp <- mice::mice(mice::nhanes, print = FALSE) -plot_chains(imp) +plot_trace(imp) } diff --git a/tests/testthat/test-mids.R b/tests/testthat/test-mids.R deleted file mode 100644 index 0d3c0fa4..00000000 --- a/tests/testthat/test-mids.R +++ /dev/null @@ -1,5 +0,0 @@ -test_that("plot_chains creates ggplot object", { - imp <- mice::mice(data.frame(a = 1:4, b = c(NA, 2, 2, 1)), printFlag = FALSE) - gg <- plot_chains(imp) - expect_s3_class(gg, "ggplot") -}) diff --git a/vignettes/ggmice.Rmd b/vignettes/ggmice.Rmd index dff5be61..cd639052 100644 --- a/vignettes/ggmice.Rmd +++ b/vignettes/ggmice.Rmd @@ -88,7 +88,7 @@ The `ggmice` package contains functions to evaluate observed and imputed data. ## Algorithmic convergence ```{r convergence} -plot_chains(imp, "bmi") +plot_trace(imp, "bmi") ``` ## Box and whiskers plot diff --git a/vignettes/old_friends.Rmd b/vignettes/old_friends.Rmd new file mode 100644 index 00000000..10e27037 --- /dev/null +++ b/vignettes/old_friends.Rmd @@ -0,0 +1,113 @@ +--- +title: "old_friends" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{old_friends} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(ggmice) +imp <- mice::mice(mice::boys, method = "pmm", printFlag = FALSE) +``` + +How to re-create the plotting functions from `mice` with `ggmice`, in alphabetical order. + +# `mice::bwplot()` + +Box-and-whisker plot of observed and imputed data. + +```{r bwplot} +# original plot +mice::bwplot(imp, bmi ~ .imp) +# ggmice equivalent +ggmice(imp, ggplot2::aes(x = .imp, y = bmi)) + + ggplot2::geom_boxplot() + + ggplot2::labs(x = "Imputation number") +# extended reproduction with ggmice +ggmice(imp, ggplot2::aes(x = .imp, y = bmi)) + + ggplot2::stat_boxplot(geom = 'errorbar', linetype = "dashed") + + ggplot2::geom_boxplot(outlier.colour = "grey", outlier.shape = 1) + + ggplot2::labs(x = "Imputation number") +``` + +# `mice::densityplot()` + +Density plot of observed and imputed data. + +```{r densityplot} +# original plot +mice::densityplot(imp, ~bmi) +# ggmice equivalent +ggmice(imp, ggplot2::aes(x = bmi, group = .imp)) + + ggplot2::geom_density() +# extended reproduction with ggmice +ggmice(imp, ggplot2::aes(x = bmi, group = .imp, size = .where)) + + ggplot2::geom_density() + + ggplot2::scale_size_manual(values = c("observed" = 1, "imputed" = 0.5), guide = "none") +``` + +# `mice::flux()` + +[To be added] + +# `mice::md.pattern()` + +Missing data pattern plot. + +```{r md.pattern} +# original plot +mice::md.pattern(imp$data) +# ggmice equivalent +plot_pattern(imp$data) +``` + +# `mice::plot.mids()` + +Plot the trace lines of the MICE algorithm. + +```{r plot.mids} +# original plot +plot(imp, bmi ~ .it | .ms) +# ggmice equivalent +plot_trace(imp, "bmi") +``` + +# `mice::stripplot()` + +Stripplot of observed and imputed data. + +```{r stripplot} +# original plot +mice::stripplot(imp, bmi ~ .imp) +# ggmice equivalent +ggmice(imp, ggplot2::aes(x = .imp, y = bmi)) + + ggplot2::geom_jitter(width = 0.25) + + ggplot2::labs(x = "Imputation number") +# extended reproduction with ggmice + +``` + +# `mice::xyplot()` + +Scatterplot of observed and imputed data. + +```{r} +# original plot +mice::xyplot(imp, bmi ~ age) +# ggmice equivalent +ggmice(imp, ggplot2::aes(age, bmi)) + + ggplot2::geom_point() +# extended reproduction with ggmice +ggmice(imp, ggplot2::aes(age, bmi)) + + ggplot2::geom_point(shape = 1) +``` +