Skip to content

Commit

Permalink
Add fist draft version vignette (issue amices#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanneoberman committed Mar 3, 2022
1 parent 0b047e6 commit edb9a18
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Expand Up @@ -3,3 +3,5 @@
^LICENSE\.md$
^README\.Rmd$
^\.github$
^doc$
^Meta$
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -3,3 +3,6 @@
.Rdata
.httr-oauth
.DS_Store
inst/doc
/doc/
/Meta/
5 changes: 4 additions & 1 deletion DESCRIPTION
Expand Up @@ -29,7 +29,10 @@ Imports:
utils
Suggests:
testthat (>= 3.0.0),
plotly
plotly,
knitr,
rmarkdown
URL: https://github.com/amices/ggmice, https://amices.org/
BugReports: https://github.com/amices/ggmice
Config/testthat/edition: 3
VignetteBuilder: knitr
8 changes: 6 additions & 2 deletions R/bwplot.R
Expand Up @@ -22,5 +22,9 @@ bwplot <- function(imp, vrb = "all") {
ggplot2::scale_x_discrete(drop = FALSE) +
ggplot2::labs(x = "Imputation number\n(0 = original data)")
}) %>% stats::setNames(vrb)
return(gg)
}
if (length(vrb) == 1) {
return(gg[[1]])
} else {
return(gg)
}
}
11 changes: 8 additions & 3 deletions R/densityplot.R
Expand Up @@ -17,8 +17,13 @@ densityplot <- function(imp, vrb = "all") {
vrb <- names(imp$data)
}
gg <- purrr::map(vrb, ~ {
ggmice(imp, ggplot2::aes_string(x = .x, group = ".imp")) +
ggplot2::geom_density(fill = NA)
ggmice(imp, ggplot2::aes_string(x = .x, group = ".imp", size = ".where")) +
ggplot2::geom_density(fill = NA) +
ggplot2::scale_size_manual(values = c("observed" = 1, "imputed" = 0.5), guide = "none")
}) %>% stats::setNames(vrb)
return(gg)
if (length(vrb) == 1) {
return(gg[[1]])
} else {
return(gg)
}
}
19 changes: 12 additions & 7 deletions R/pred.R
Expand Up @@ -28,7 +28,7 @@ plot_pred <- function(pred, label = FALSE, square = TRUE) {
ggplot2::scale_y_discrete(limits = rev(vrbs)) +
ggplot2::scale_fill_manual(values = c("yes" = "#006CC2B3", "no" = "white")) +
ggplot2::labs(
x = "Predictor in imputation model",
x = "Imputation model predictor",
y = "Variable to impute",
fill = "Predictor used",
color = ""
Expand All @@ -49,17 +49,18 @@ plot_pred <- function(pred, label = FALSE, square = TRUE) {
#' @param vrb String or vector with variable name(s), default is "all".
#' @param label Logical indicating whether correlation values should be displayed.
#' @param square Logical indicating whether the plot tiles should be squares (setting the plot height equal to the plot width).
#' @param diagonal Logical indicating whether the correlation of each variable with itself should be displayed.
#'
#' @return An object of class `ggplot`
#'
#' @examples
#' plot_corr(mice::nhanes, label = TRUE)
#' @export
plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE) {
plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE, diagonal = FALSE) {
if (!is.data.frame(dat) & !is.matrix(dat)) {
stop("Dataset should be a 'data.frame' or 'matrix'.")
}
if (vrb == "all") {
if (vrb[1] == "all") {
vrb <- names(dat)
}
p <- length(vrb)
Expand All @@ -68,14 +69,17 @@ plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE) {
prd = vrb,
corr = matrix(round(stats::cov2cor(stats::cov(data.matrix(dat[, vrb]), use = "pairwise.complete.obs")), 2), nrow = p * p, byrow = TRUE)
)
if (!diagonal) {
corrs[corrs$vrb == corrs$prd, "corr"] <- NA
}
gg <- ggplot2::ggplot(corrs, ggplot2::aes(x = .data$prd, y = .data$vrb, label = .data$corr, fill = .data$corr)) +
ggplot2::geom_tile() +
ggplot2::geom_tile(color = "black") +
ggplot2::scale_x_discrete(limits = vrb, position = "top") +
ggplot2::scale_y_discrete(limits = rev(vrb)) +
ggplot2::scale_fill_gradient2(low = "deepskyblue", mid = "lightyellow", high = "orangered", limits = c(-1, 1)) +
ggplot2::scale_fill_gradient2(low = "deepskyblue", mid = "lightyellow", high = "orangered", na.value = "white", limits = c(-1, 1)) +
ggplot2::labs(
x = "",
y = "",
x = "Imputation model predictor",
y = "Variable to impute",
fill = "Correlation*\n",
caption = "*paiwise complete observations"
) +
Expand All @@ -91,3 +95,4 @@ plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE) {

# TODO: add plot for missingness indicators predictors
# TODO: maybe add model.matrix argument to correlation plot?
# TODO: add argument to rotate/shorten variable names
8 changes: 6 additions & 2 deletions R/stripplot.R
Expand Up @@ -22,8 +22,12 @@ stripplot <- function(imp, vrb = "all") {
ggplot2::scale_x_discrete(drop = FALSE) +
ggplot2::labs(x = "Imputation number\n(0 = original data)")
}) %>% stats::setNames(vrb)
return(gg)
}
if (length(vrb) == 1) {
return(gg[[1]])
} else {
return(gg)
}
}

# TODO: add vignette with stripplot() + geom_boxplot(alpha = 0.5, outlier.shape = NA)
# TODO: add vertical jitter or warning for categorical variables
4 changes: 3 additions & 1 deletion man/plot_corr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion tests/testthat/test-miceplots.R
@@ -1,6 +1,8 @@
test_that("bwplot creates ggplot object", {
imp <- mice::mice(data.frame(a = 1:4, b = c(NA, 2, 2, 1)), printFlag = FALSE)
gg <- bwplot(imp, "b")
gg <- bwplot(imp)
expect_type(gg, "list")
expect_s3_class(gg[[1]], "ggplot")
gg <- bwplot(imp, "b")
expect_s3_class(gg, "ggplot")
})
2 changes: 2 additions & 0 deletions vignettes/.gitignore
@@ -0,0 +1,2 @@
*.html
*.R
127 changes: 127 additions & 0 deletions vignettes/ggmice.Rmd
@@ -0,0 +1,127 @@
---
title: "Visualize incomplete and imputed data with `ggmice`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{ggmice}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
%\VignetteDepends{mice}
%\VignetteDepends{ggplot2}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7.2,
fig.height = 4
)
```

# Overview

The package `ggmice` bundles a set of plotting functions for the imputation package `mice`. These plotting functions adhere to the 'grammar of graphics' style, popularized by the `ggplot2` package. With that, `ggmice` enhances imputation workflows and provides plotting objects that are easy to extend and manipulate.

The main `mice` package contains several `lattice` style plotting functions, `ggmice` provides `ggplot2` versions of these functions and more. The core function in the `ggmice` package, the `ggmice()` function, can be applied to obtain `ggplot2` objects for both incomplete *and* imputed data. This vignette provides an overview of the main functions included in `ggmice`.

# Set-up

The `ggmice` package can be installed from GitHub as follows:

```{r install, echo=TRUE, eval=FALSE}
install.packages("devtools")
devtools::install_github("amices/ggmice")
```

It is recommended to load the imputation package `mice` and the plotting package `ggplot2` into your workspace as well.

```{r setup}
library(ggmice)
dat <- mice::boys
imp <- mice::mice(dat, method = "pmm", printFlag = FALSE)
```

# Incomplete data

The `ggmice` package contains functions to explore incomplete data.

## Missing data pattern

```{r pattern, echo=TRUE, eval=FALSE}
```

## Correlations between variables

The function `plot_corr()` can be used to investigate relations between variables, for the development of imputation models. Only one of the arguments (`dat`, the incomplete dataset) is required, all other arguments are optional.

```{r correlations}
plot_corr(dat)
plot_corr(dat, vrb = c("hgt", "wgt", "bmi"), label = TRUE, square = FALSE, diagonal = TRUE)
```

## Predictor matrix

The function `plot_pred()` displays `mice` predictor matrices. A predictor matrix is typically created using `mice::make.predictorMatrix()`, `mice::quickpred()`, or by using the default in `mice::mice()` and extracting the `predictorMatrix` from the resulting `mids` object. The `plot_pred()` function only requires the `pred` argument.

```{r predictormatrix}
pred <- mice::quickpred(dat)
plot_pred(pred)
plot_pred(pred, label = TRUE, square = FALSE)
```

## The `ggmice()` function

The `ggmice` function processes incomplete data in such a way that it can be displayed with `ggplot2`. The missing values are displayed on the axes.

```{r incomplete}
ggmice(dat, ggplot2::aes(age, bmi)) +
ggplot2::geom_point()
ggmice(dat, ggplot2::aes(gen, bmi)) +
ggplot2::geom_point()
```

# Imputed data

The `ggmice` package contains functions to evaluate observed and imputed data.

## Algorithmic convergence

```{r convergence}
plot_chains(imp, "bmi")
```

## Box and whiskers plot

```{r bwplot}
bwplot(imp, "bmi")
```

## Stripplot

```{r stripplot}
stripplot(imp, "bmi")
```

## Densityplot

```{r densityplot}
densityplot(imp, "bmi")
```

## X-Y plot

```{r xyplot}
xyplot(imp, "age", "bmi")
```

## The `ggmice()` function

```{r imputed}
ggmice(imp, ggplot2::aes(log(wgt), hgt)) +
ggplot2::geom_point()
ggmice(imp, ggplot2::aes(x = .imp, y = bmi)) +
ggplot2::geom_jitter() +
ggplot2::geom_boxplot(size = 1, fill = "white", alpha = 0.5, outlier.shape = NA)
```

0 comments on commit edb9a18

Please sign in to comment.