Skip to content

Commit

Permalink
Add fluxplot function (fix amices#16), add separate vignette for mice…
Browse files Browse the repository at this point in the history
… plots (fix amices#17), update vignette (amices#9) and site (amices#12)
  • Loading branch information
hanneoberman committed Mar 8, 2022
1 parent b1fb93a commit e578d98
Show file tree
Hide file tree
Showing 11 changed files with 206 additions and 89 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -5,6 +5,7 @@ export(bwplot)
export(densityplot)
export(ggmice)
export(plot_corr)
export(plot_flux)
export(plot_pattern)
export(plot_pred)
export(plot_trace)
Expand Down
9 changes: 5 additions & 4 deletions R/corr.R
Expand Up @@ -29,19 +29,20 @@ plot_corr <- function(dat, vrb = "all", label = FALSE, square = TRUE, diagonal =
corrs[corrs$vrb == corrs$prd, "corr"] <- NA
}
gg <- ggplot2::ggplot(corrs, ggplot2::aes(x = .data$prd, y = .data$vrb, label = .data$corr, fill = .data$corr)) +
ggplot2::geom_tile(color = "black") +
ggplot2::geom_tile(color = "black", alpha = 0.6) +
ggplot2::scale_x_discrete(limits = vrb, position = "top") +
ggplot2::scale_y_discrete(limits = rev(vrb)) +
ggplot2::scale_fill_gradient2(low = "deepskyblue", mid = "lightyellow", high = "orangered", na.value = "white", limits = c(-1, 1)) +
ggplot2::scale_fill_gradient2(low = ggplot2::alpha("deepskyblue", 0.6), mid = "lightyellow", high = ggplot2::alpha("orangered", 0.6), na.value = "white", limits = c(-1, 1)) +
ggplot2::labs(
x = "Imputation model predictor",
y = "Variable to impute",
fill = "Correlation*",
fill = "Correlation*
",
caption = "*pairwise complete observations"
) +
theme_minimice()
if (label) {
gg <- gg + ggplot2::geom_text(color = "black", show.legend = FALSE)
gg <- gg + ggplot2::geom_text(color = "black", show.legend = FALSE, na.rm = TRUE)
}
if (square) {
gg <- gg + ggplot2::coord_fixed()
Expand Down
66 changes: 48 additions & 18 deletions R/flux.R
@@ -1,18 +1,48 @@
# plot_flux <- function(dat) {
# # escape function if dataset is complete
# # if(!any(is.na(dat))){return(plot_a_mouse())}
# # plot in and outflux
# flx <- mice::flux(dat) %>% cbind(variable = rownames(.))
# gg <- flx %>%
# ggplot2::ggplot(ggplot2::aes(x = influx,
# y = outflux,
# label = variable)) +
# ggplot2::geom_abline(intercept = 1,
# slope = -1,
# linetype = "dashed") +
# ggplot2::geom_text(position = ggplot2::position_jitter(width = 0.01, height = 0.01)) +
# ggplot2::lims(x = c(-0.01, 1.01), y = c(-0.01, 1.01)) +
# ggplot2::theme_classic()
# # output
# return(gg)
# }
#' Influx and outflux plot of multivariate missing data patterns
#'
#' @param dat An incomplete dataset of class `data.frame`, `tibble`, or `matrix`.
#' @param label Logical indicating whether variable names should be displayed within the plot (the default) or with colors in the legend.
#' @param caption Logical indicating whether the figure caption should be displayed.
#'
#' @return An object of class `ggplot`.
#'
#' @examples
#' plot_flux(mice::nhanes)
#' @export
plot_flux <- function(dat, label = TRUE, caption = TRUE) {
# escape function if dataset is complete
# if(!any(is.na(dat))){return(plot_a_mouse())}
# plot in and outflux
flx <- mice::flux(dat)[, c("influx", "outflux")]
gg <- data.frame(vrb = rownames(flx), flx, outflux_nudge = flx$outflux - 0.025) %>%
ggplot2::ggplot(ggplot2::aes(x = .data$influx,
y = .data$outflux_nudge,
color = .data$vrb,
label = .data$vrb)) +
ggplot2::geom_abline(intercept = 1,
slope = -1,
linetype = "dashed") +
ggplot2::lims(x = c(-0.05, 1.05), y = c(-0.05, 1.05)) +
ggplot2::coord_cartesian(clip = "off") +
theme_mice()
if (label) {
gg <- gg +
ggplot2::geom_text(color = "black", position = ggplot2::position_nudge(y = 0.025)) #position = ggplot2::position_jitter(width = 0.05, height = 0), hjust = "outward"
} else {
gg <- gg +
ggplot2::geom_point(shape = 1, position = ggplot2::position_nudge(y = 0.025)) +
ggplot2::labs(color = "")
}
if (caption) {
gg <- gg +
ggplot2::labs(x = "Influx*",
y = "Outflux**",
caption = "*connection of a variable's missingness indicator with observed data on other variables\n **connection of a variable's observed data with missing data on other variables")
} else {
gg <- gg +
ggplot2::labs(x = "Influx",
y = "Outflux")
}
# output
return(gg)
}
4 changes: 2 additions & 2 deletions R/pred.R
Expand Up @@ -24,10 +24,10 @@ plot_pred <- function(pred, label = FALSE, square = TRUE, rotate = FALSE) {
ind = matrix(pred, nrow = p * p, byrow = TRUE)
)
gg <- ggplot2::ggplot(long, ggplot2::aes(x = .data$prd, y = .data$vrb, label = .data$ind, fill = ifelse(.data$ind == 0, "no", "yes"))) +
ggplot2::geom_tile(color = "black", alpha = 1) +
ggplot2::geom_tile(color = "black", alpha = 0.6) +
ggplot2::scale_x_discrete(limits = vrbs, position = "top") +
ggplot2::scale_y_discrete(limits = rev(vrbs)) +
ggplot2::scale_fill_manual(values = c("yes" = "grey60", "no" = "white")) + ## 006CC2B3
ggplot2::scale_fill_manual(values = c("yes" = "grey75", "no" = "white")) + ## 006CC2B3
ggplot2::labs(
x = "Imputation model predictor",
y = "Variable to impute",
Expand Down
5 changes: 4 additions & 1 deletion R/theme.R
Expand Up @@ -24,7 +24,10 @@ theme_minimice <- function() {
legend.position = "bottom",
legend.justification = "right",
strip.placement = "outside",
panel.grid.minor = ggplot2::element_blank()
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_line(colour = "grey95")#,
# axis.ticks = ggplot2::element_line(size = 0,5),
# axis.text = ggplot2::element_text(margin = 0)
)
}

Expand Down
7 changes: 5 additions & 2 deletions README.Rmd
Expand Up @@ -24,11 +24,11 @@ knitr::opts_chunk$set(

## Plotting package for incomplete and imputed data

`ggmice` is an `R` package which enhances the imputation package `mice` with `ggplot2` visualizations. See the `ggmice` vignette for an overview of functionalities.
The `ggmice` package enhances imputation package `mice` with `ggplot2` visualizations. See the [vignette](https://amices.org/ggmice/articles/ggmice.html) for an overview of `ggmice`'s functionalities.

## Installation

You can install the development version of `ggmice` from [GitHub](https://github.com/) with:
You can install the development version of `ggmice` from [GitHub](https://github.com/amices) with:

``` r
# install.packages("devtools")
Expand All @@ -40,9 +40,12 @@ devtools::install_github("amices/ggmice")
Visualize missing data in an incomplete dataset, or evaluate imputed data against the observed data.

```{r example}
# load the package and some data
library(ggmice)
dat <- mice::boys
# visualize the incomplete data
ggmice(dat, ggplot2::aes(age, bmi)) + ggplot2::geom_point()
# impute the data and visualize again
imp <- mice::mice(dat, m = 1, printFlag = FALSE)
ggmice(imp, ggplot2::aes(age, bmi)) + ggplot2::geom_point()
```
Expand Down
12 changes: 8 additions & 4 deletions README.md
Expand Up @@ -15,14 +15,15 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h

## Plotting package for incomplete and imputed data

`ggmice` is an `R` package which enhances the imputation package `mice`
with `ggplot2` visualizations. See the `ggmice` vignette for an overview
of functionalities.
The `ggmice` package enhances imputation package `mice` with `ggplot2`
visualizations. See the
[vignette](https://amices.org/ggmice/articles/ggmice.html) for an
overview of `ggmice`’s functionalities.

## Installation

You can install the development version of `ggmice` from
[GitHub](https://github.com/) with:
[GitHub](https://github.com/amices) with:

``` r
# install.packages("devtools")
Expand All @@ -35,14 +36,17 @@ Visualize missing data in an incomplete dataset, or evaluate imputed
data against the observed data.

``` r
# load the package and some data
library(ggmice)
dat <- mice::boys
# visualize the incomplete data
ggmice(dat, ggplot2::aes(age, bmi)) + ggplot2::geom_point()
```

<img src="man/figures/README-example-1.png" width="100%" />

``` r
# impute the data and visualize again
imp <- mice::mice(dat, m = 1, printFlag = FALSE)
ggmice(imp, ggplot2::aes(age, bmi)) + ggplot2::geom_point()
```
Expand Down
Binary file modified man/figures/README-example-2.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 24 additions & 0 deletions man/plot_flux.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 54 additions & 35 deletions vignettes/ggmice.Rmd
Expand Up @@ -20,9 +20,12 @@ knitr::opts_chunk$set(

# Overview

The package `ggmice` bundles a set of plotting functions for the imputation package `mice`. These plotting functions adhere to the 'grammar of graphics' style, popularized by the `ggplot2` package. With that, `ggmice` enhances imputation workflows and provides plotting objects that are easy to extend and manipulate.
The `ggmice` package provides plotting functions for the evaluation of incomplete data, `mice` imputation models, and multiply imputed data sets (`mice::mids`). The functions in `ggmice` adhere to the 'grammar of graphics' philosophy, popularized by the `ggplot2` package. With that, `ggmice` enhances imputation workflows and provides plotting objects that are easily extended and manipulated by each individual 'imputer'.

The main `mice` package contains several `lattice` style plotting functions, `ggmice` provides `ggplot2` versions of these functions and more. The core function in the `ggmice` package, the `ggmice()` function, can be applied to obtain `ggplot2` objects for both incomplete *and* imputed data. This vignette provides an overview of the main functions included in `ggmice`.
This vignette gives an overview of the core plotting functions in `ggmice`. Experienced `mice` users may already be familiar with the `lattice` style plotting functions in `mice`. These 'old friends' such as `mice::bwplot()` can be re-created with `ggmice`, see [this](https://amices.org/ggmice/articles/old_friends.html) vignette for advise.

<!-- Loading `ggmice` after `mice` masks these functions and provides suggestions for `ggplot2` equivalents these 'old friends'. -->
<!-- The `mice` package contains several plotting functions that may guide 'imputers' in their workflow. `lattice` style plotting functions, `ggmice` provides `ggplot2` versions of these functions and more. The core function in the `ggmice` package, the `ggmice()` function, can be applied to obtain `ggplot2` objects for both incomplete *and* imputed data. This vignette provides an overview of the main functions included in `ggmice`. -->

# Set-up

Expand All @@ -36,90 +39,106 @@ devtools::install_github("amices/ggmice")
It is recommended to load the imputation package `mice` and the plotting package `ggplot2` into your workspace as well.

```{r setup}
# load packages
library(ggmice)
# load incomplete dataset
dat <- mice::boys
# generate imputations
imp <- mice::mice(dat, method = "pmm", printFlag = FALSE)
```

# Incomplete data

The `ggmice` package contains functions to explore incomplete data.

## Missing data pattern
### Missing data pattern

The `plot_pattern()` function displays the missing data pattern in an incomplete dataset. The argument `dat` (the incomplete dataset) is required, the argument `square` is optional and determines whether the missing data pattern has square or rectangular tiles, and the optional argument `rotate` changes the angle of the variable names 90 degrees if requested.

```{r pattern}
# create missing data pattern plot
plot_pattern(dat)
# specify optional arguments
plot_pattern(dat, square = TRUE, rotate = TRUE)
```

### Influx and outflux

```{r pattern, echo=TRUE, eval=FALSE}
The `plot_flux()` function produces an influx-outflux plot. The influx of a variable quantifies how well its missing data connect to the observed data on other variables. The outflux of a variable quantifies how well its observed data connect to the missing data on other variables. In general, higher influx and outflux values are preferred when building imputation models. The plotting function requires an incomplete daatset (argument `dat`), and takes optional arguments to adjust the legend and axis labels.

```{r flux}
# create influx-outflux plot
plot_flux(dat)
# specify optional arguments
plot_flux(dat, label = FALSE, caption = FALSE)
```

## Correlations between variables

### Correlations between variables

The function `plot_corr()` can be used to investigate relations between variables, for the development of imputation models. Only one of the arguments (`dat`, the incomplete dataset) is required, all other arguments are optional.

```{r correlations}
# create correlation plot
plot_corr(dat)
# specify optional arguments
plot_corr(dat, vrb = c("hgt", "wgt", "bmi"), label = TRUE, square = FALSE, diagonal = TRUE)
```

## Predictor matrix
### Predictor matrix

The function `plot_pred()` displays `mice` predictor matrices. A predictor matrix is typically created using `mice::make.predictorMatrix()`, `mice::quickpred()`, or by using the default in `mice::mice()` and extracting the `predictorMatrix` from the resulting `mids` object. The `plot_pred()` function only requires the `pred` argument.
The function `plot_pred()` displays `mice` predictor matrices. A predictor matrix is typically created using `mice::make.predictorMatrix()`, `mice::quickpred()`, or by using the default in `mice::mice()` and extracting the `predictorMatrix` from the resulting `mids` object. The `plot_pred()` function only requires the `pred` argument, but other arguments can be provided.

```{r predictormatrix}
# create predictor matrix
pred <- mice::quickpred(dat)
# create predictor matrix plot
plot_pred(pred)
# specify optional arguments
plot_pred(pred, label = TRUE, square = FALSE)
```

## The `ggmice()` function
### The `ggmice()` function

The `ggmice` function processes incomplete data in such a way that it can be displayed with `ggplot2`. The missing values are displayed on the axes.
The `ggmice` function processes incomplete data in such a way that it can be displayed with `ggplot2`. The missing values are displayed on the axes (i.e., a missing value for the x-variable is plotted on top of the y-axis, and vice versa). Note that, in contrast to the `ggplot()` function, `ggmice()` *requires* an aesthetic mapping (argument `mapping`).

```{r incomplete}
# create scatter plot with continuous variables
ggmice(dat, ggplot2::aes(age, bmi)) +
ggplot2::geom_point()
# create scatter plot with a categorical variable
ggmice(dat, ggplot2::aes(gen, bmi)) +
ggplot2::geom_point()
```

# Imputed data

The `ggmice` package contains functions to evaluate observed and imputed data.
The `ggmice` package contains two functions to evaluate observed and imputed data.

### Algorithmic convergence

## Algorithmic convergence
The function `plot_trace()` plots the trace lines of the MICE algorithm for convergence evaluation. The only required argument is `imp` (to supply a `mice::mids` object). The optional argument `vrb` defaults to `"all"`, which would display traceplots for all variables.

```{r convergence}
# create traceplot for one variable
plot_trace(imp, "bmi")
```

## Box and whiskers plot

```{r bwplot}
bwplot(imp, "bmi")
```

## Stripplot
### The `ggmice()` function

```{r stripplot}
stripplot(imp, "bmi")
```

## Densityplot

```{r densityplot}
densityplot(imp, "bmi")
```

## X-Y plot

```{r xyplot}
xyplot(imp, "age", "bmi")
```

## The `ggmice()` function
The `ggmice` function is versatile. It produces a `ggplot` object that can be extended to mimic every type of plot for observed and imputed data in `mice`, see see [this](https://amices.org/ggmice/articles/old_friends.html) vignette for advise. Below are some examples of plots produced with `ggmice()`. Note that, in contrast to the `ggplot()` function, `ggmice()` *requires* an aesthetic mapping (argument `mapping`).

```{r imputed}
# create scatter plot with continuous variables
ggmice(imp, ggplot2::aes(age, bmi)) +
ggplot2::geom_point()
# create scatter plot with a categorical variable
ggmice(imp, ggplot2::aes(gen, bmi)) +
ggplot2::geom_point()
# create scatter plot with a transformed variable
ggmice(imp, ggplot2::aes(log(wgt), hgt)) +
ggplot2::geom_point()
# create stripplot with bowplot overlay
ggmice(imp, ggplot2::aes(x = .imp, y = bmi)) +
ggplot2::geom_jitter() +
ggplot2::geom_boxplot(size = 1, fill = "white", alpha = 0.5, outlier.shape = NA)
Expand Down

0 comments on commit e578d98

Please sign in to comment.