Skip to content

Commit

Permalink
Merge pull request #326 from larmarange/stat_cross
Browse files Browse the repository at this point in the history
  • Loading branch information
schloerke committed May 30, 2020
2 parents 4a41d4c + 5ee8cdc commit 7d50265
Show file tree
Hide file tree
Showing 7 changed files with 607 additions and 0 deletions.
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ S3method(print,ggmatrix)
S3method(print,glyphplot)
S3method(print,legend_guide_box)
S3method(str,ggmatrix)
export(StatCross)
export(StatGGallyCount)
export(add_ggproto_to_ggmatrix)
export(add_ref_boxes)
Expand All @@ -26,6 +27,7 @@ export(ggally_blankDiag)
export(ggally_box)
export(ggally_box_no_facet)
export(ggally_cor)
export(ggally_cross)
export(ggally_cor_v1_5)
export(ggally_count)
export(ggally_countDiag)
Expand Down Expand Up @@ -53,6 +55,8 @@ export(ggally_ratio)
export(ggally_smooth)
export(ggally_smooth_lm)
export(ggally_smooth_loess)
export(ggally_table)
export(ggally_tableDiag)
export(ggally_statistic)
export(ggally_text)
export(ggcoef)
Expand All @@ -78,6 +82,7 @@ export(grab_legend)
export(is.glyphplot)
export(is_character_column)
export(is_horizontal)
export(keep_colour_if_in_x_or_y)
export(lowertriangle)
export(mapping_color_to_fill)
export(mapping_string)
Expand All @@ -94,6 +99,7 @@ export(range01)
export(rescale01)
export(rescale11)
export(scatmat)
export(stat_cross)
export(signif_stars)
export(stat_ggally_count)
export(uppertriangle)
Expand All @@ -106,6 +112,7 @@ import(RColorBrewer)
import(ggplot2)
import(plyr)
import(utils)
importFrom(broom,augment)
importFrom(grDevices,colorRampPalette)
importFrom(grDevices,gray.colors)
importFrom(grid,gpar)
Expand Down
20 changes: 20 additions & 0 deletions R/gg-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,26 @@ mapping_swap_x_y <- function(mapping) {
}


#' Keep colour mapping only if same as x or y
#' @param mapping output of \code{ggplot2::\link[ggplot2]{aes}(...)}
#' @return Aes mapping with colour mapping kept only if equal to
#' mapping to x or to y.
#' @export
#' @examples
#' mapping <- aes(x = sex, y = age, colour = sex)
#' keep_colour_if_in_x_or_y(mapping)
#'
#' mapping <- aes(x = sex, y = age, colour = region)
#' keep_colour_if_in_x_or_y(mapping)
keep_colour_if_in_x_or_y <- function(mapping) {
if (
!is.null(mapping_string(mapping$colour)) &
mapping_string(mapping$colour) != mapping_string(mapping$x) &
mapping_string(mapping$colour) != mapping_string(mapping$y)
) mapping <- mapping[names(mapping) != "colour"]
mapping
}


#' Plots the Scatter Plot
#'
Expand Down
290 changes: 290 additions & 0 deletions R/stat_cross.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
#' Compute cross-tabulation statisticics
#'
#' Computes statistics of a 2-dimensional matrix using \code{\link[broom]{augment.htest}}
#' from \pkg{broom}.
#'
#' @inheritParams ggplot2::stat_identity
#' @param geom Override the default connection between \code{\link[ggplot1]{geom_point}}
#' and \code{stat_prop}.
#' @param na.rm If \code{TRUE}, the default, missing values are removed with a warning.
#' If `TRUE`, missing values are silently removed.
#' @section Aesthetics:
#' \code{stat_prop} requires the \strong{x} and the \strong{y} aesthetics.
#' @section Computed variables:
#' \describe{
#' \item{observed}{number of observations in x,y}
#' \item{prop}{proportion of total}
#' \item{row.prop}{row proportion}
#' \item{col.prop}{column proportion}
#' \item{expected}{expected count under the null hypothesis}
#' \item{residuals}{Pearson's residual}
#' \item{stdres}{standardized residual}
#' }
#'
#' @importFrom broom augment
#' @export
#' @examples
#' d <- as.data.frame(Titanic)
#'
#' # by default, plot number of observations
#' ggplot(d) +
#' aes(x = Class, y = Survived, weight = Freq) +
#' stat_cross() +
#' scale_size_area()
#'
#' # custom shape and fill colour based on chi-squared residuals
#' ggplot(d) +
#' aes(x = Class, y = Survived, weight = Freq, fill = after_stat(stdres)) +
#' stat_cross(shape = 22) +
#' scale_fill_steps2(breaks = c(-4, -2, 2, 4), show.limits = TRUE)
#'
#' # plotting the number of observations as a table
#' ggplot(d) +
#' aes(
#' x = Class, y = Survived, weight = Freq,
#' label = scales::percent(after_stat(row.prop)), size = NULL
#' ) +
#' geom_text(stat = "cross")
#'
#' # Row proportions with standardized residuals
#' ggplot(d) +
#' aes(
#' x = Class, y = Survived, weight = Freq,
#' label = scales::percent(after_stat(row.prop)),
#' size = NULL, fill = after_stat(stdres)
#' ) +
#' stat_cross(shape = 22, size = 30) +
#' geom_text(stat = "cross") +
#' scale_fill_steps2(breaks = c(-3, -2, 3, 4), show.limits = TRUE) +
#' facet_grid(Sex ~ .) +
#' labs(fill = "Standardized residuals") +
#' theme_minimal()
#'
#' # can work with continuous or character variables
#' data(tips, package = "reshape")
#' ggplot(tips) +
#' aes(x = tip, y = as.character(day)) +
#' stat_cross(alpha = .1, color = "blue") +
#' scale_size_area(max_size = 12)
#'
stat_cross <- function(mapping = NULL, data = NULL,
geom = "point", position = "identity",
...,
na.rm = TRUE,
show.legend = NA,
inherit.aes = TRUE) {

params <- list(
na.rm = na.rm,
...
)

layer(
data = data,
mapping = mapping,
stat = StatCross,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = params
)
}

#' @rdname stat_cross
#' @format NULL
#' @usage NULL
#' @export
StatCross <- ggproto("StatCross", Stat,
required_aes = c("x", "y"),
default_aes = aes(
weight = 1
),

setup_params = function(data, params) {
params
},

extra_params = c("na.rm"),

compute_panel = function(self, data, scales) {
if (is.null(data$weight))
data$weight <- rep(1, nrow(data))

# compute cross statistics
panel <- broom::augment(chisq.test(xtabs(weight ~ y + x, data = data)))

names(panel)[which(names(panel) == ".observed")] <- "observed"
names(panel)[which(names(panel) == ".prop")] <- "prop"
names(panel)[which(names(panel) == ".row.prop")] <- "row.prop"
names(panel)[which(names(panel) == ".col.prop")] <- "col.prop"
names(panel)[which(names(panel) == ".expected")] <- "expected"
names(panel)[which(names(panel) == ".residuals")] <- "residuals"
names(panel)[which(names(panel) == ".stdres")] <- "stdres"

data <- merge(data, panel, by = c("x", "y"), all.x = TRUE)
data
}
)


#' Plots the number of observations
#'
#' Plot the number of observations by using squares points
#' with proportional areas. Could be filled according to chi-squared
#' statistics computed by \code{\link{stat_cross}}. Labels could also
#' be added (see examples).
#'
#' @param data data set using
#' @param mapping aesthetics being used
#' @param ... other arguments passed to \code{\link[ggplot2]{geom_point}(...)}
#' @param geom_text_args other arguments passed to \code{\link[ggplot2]{geom_text}(...)}
#' @author Joseph Larmarange \email{joseph@@larmarange.net}
#' @keywords hplot
#' @export
#' @examples
#' data(tips, package = "reshape")
#' ggally_cross(tips, mapping = aes(x = smoker, y = sex))
#'
#' # Custom max size
#' ggally_cross(tips, mapping = aes(x = smoker, y = sex)) +
#' scale_size_area(max_size = 40)
#'
#' # Custom fill
#' ggally_cross(tips, mapping = aes(x = smoker, y = sex), fill = "red")
#'
#' # Custom shape
#' ggally_cross(tips, mapping = aes(x = smoker, y = sex), shape = 21)
#'
#' # Fill squares according to standardized residuals
#' d <- as.data.frame(Titanic)
#' ggally_cross(
#' d,
#' mapping = aes(x = Class, y = Survived, weight = Freq, fill = after_stat(stdres))
#' ) +
#' scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE)
#'
#' # Add labels
#' ggally_cross(
#' tips,
#' mapping = aes(
#' x = smoker, y = sex, colour = smoker,
#' label = scales::percent(after_stat(prop))
#' )
#' )
#'
#' # Customize labels' appearance and same size for all squares
#' ggally_cross(
#' tips,
#' mapping = aes(
#' x = smoker, y = sex,
#' size = NULL, # do not map size to a variable
#' label = scales::percent(after_stat(prop))
#' ),
#' size = 40, # fix value for points size
#' fill = "darkblue",
#' geom_text_args = list(colour = "white", fontface = "bold", size = 6)
#' )
ggally_cross <- function(data, mapping, ..., geom_text_args = NULL){
mapping <- keep_colour_if_in_x_or_y(mapping)
mapping <- mapping_color_to_fill(mapping)

args <- list(...)
# default values for geom_point
if (!"size" %in% names(mapping))
mapping$size <- aes_string(size = "after_stat(observed)")$size
if (is.null(mapping$shape) & is.null(args$shape))
args$shape <- 22
if (is.null(mapping$fill) & is.null(args$fill))
args$fill <- "white"

p <- ggplot(data = data, mapping) +
do.call(stat_cross, args) +
scale_size_area(max_size = 20)

# default values for geom_text
geom_text_args$stat <- "cross"
if (is.null(geom_text_args$mapping))
geom_text_args$mapping <- aes(colour = NULL, size = NULL)
if (is.null(geom_text_args$show.legend))
geom_text_args$show.legend <- FALSE

if(!is.null(mapping$label))
p <- p +
do.call(geom_text, geom_text_args)

p
}

#' Display a table of the number of observations
#'
#' Plot the number of observations as a table. Other statistics computed
#' by \code{\link{stat_cross}} could be used (see examples).
#'
#' @param data data set using
#' @param mapping aesthetics being used
#' @param ... other arguments passed to \code{\link[ggplot2]{geom_text}(...)}
#' @param geom_tile_args other arguments passed to \code{\link[ggplot2]{geom_tile}(...)}
#' @note The \strong{colour} aesthetic is taken into account only if equal to
#' \strong{x} or \strong{y}.
#' @author Joseph Larmarange \email{joseph@@larmarange.net}
#' @keywords hplot
#' @export
#' @examples
#' data(tips, package = "reshape")
#' ggally_table(tips, mapping = aes(x = smoker, y = sex))
#' ggally_table(tips, mapping = aes(x = smoker, y = sex, colour = smoker))
#' ggally_table(tips, mapping = aes(x = smoker, y = sex, colour = day))
#' ggally_tableDiag(tips, mapping = aes(x = smoker))
#'
#' # Custom label size and color
#' ggally_table(tips, mapping = aes(x = smoker, y = sex), size = 16, color = "red")
#'
#' # Display column proportions
#' ggally_table(tips, mapping = aes(x = day, y = sex, label = scales::percent(after_stat(col.prop))))
#'
#' # Draw table cells
#' ggally_table(
#' tips,
#' mapping = aes(x = smoker, y = sex),
#' geom_tile_args = list(colour = "black", fill = "white")
#' )
#'
#' # Use standardized residuals to fill table cells
#' ggally_table(
#' as.data.frame(Titanic),
#' mapping = aes(
#' x = Class, y = Survived, weight = Freq,
#' fill = after_stat(stdres),
#' label = scales::percent(after_stat(col.prop), accuracy = .1)
#' ),
#' geom_tile_args = list(colour = "black")
#' ) +
#' scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE)
ggally_table <- function(data, mapping, ..., geom_tile_args = NULL){
mapping <- keep_colour_if_in_x_or_y(mapping)

# default values geom_text
if (!"label" %in% names(mapping))
mapping$label <- aes_string(label = "after_stat(observed)")$label
geom_text_args <- list(...)
geom_text_args$stat <- "cross"

# default values geom_tile
geom_tile_args$stat <- "cross"
geom_tile_args$mapping <- aes(colour = NULL)$colour
if (is.null(geom_tile_args$colour))
geom_tile_args$colour <- "transparent"
if (is.null(mapping$fill) & is.null(geom_tile_args$fill))
geom_tile_args$fill <- "transparent"

ggplot(data = data, mapping) +
do.call(geom_tile, geom_tile_args) +
do.call(geom_text, geom_text_args)
}

#' @export
ggally_tableDiag <- function(data, mapping, ..., geom_tile_args = NULL) {
mapping$y <- mapping$x
ggally_table(data = data, mapping = mapping, ..., geom_tile_args = geom_tile_args)
}

0 comments on commit 7d50265

Please sign in to comment.