Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ docs/
pkgdown/
_pkgdown.yaml
^CRAN-SUBMISSION$
dev/
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: ggbiplot
Type: Package
Title: A Grammar of Graphics Implementation of Biplots
Version: 0.6.3
Date: 2024-04-24
Version: 0.6.5
Date: 2025-09-16
Authors@R: c(
person(c("Vincent", "Q."), "Vu", , "vqv@stat.osu.edu", role = c("aut"),
comment = c(ORCID = "0000-0002-4689-0497")),
Expand All @@ -18,7 +18,7 @@ Description: A 'ggplot2' based implementation of biplots, giving a representatio
biplot and scree plot methods which can be used with the results of prcomp(), princomp(),
FactoMineR::PCA(), ade4::dudi.pca() or MASS::lda() and can be customized using 'ggplot2' techniques.
Depends:
R (>= 3.5.0),
R (>= 4.1.0),
ggplot2
Imports:
scales
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
## Version 0.6.5

- Now Depends R (>= 4.1.0) to handle |>
- Added lda() objects to those handled by reflect()
- Illustrate reflection in ggbiplot examples

## Version 0.6.4

- Documented solution to 'scale_color_discrete() produces two legends' #2
- Fix glitch with axis labels
- `ggbiplot` gains `geom.ind` and `geom.var` arguments for more flexlible handling of the geometries used to display the
observation points and variable labels.

## Version 0.6.3

- Fix axis label spacing
Expand Down
135 changes: 80 additions & 55 deletions R/ggbiplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,21 @@
#' values is assigned, contains the so-called \emph{standard coordinates} and have sum of squared
#' values equal to 1.0.
#'
#' \bold{Scales and legend}
#'
#' When the `groups` argument is not \code{NULL}, the function uses that value to set the aesthetics for `color`, `fill` and `shape`.
#' If you override the defaults using \code{scale_color_discrete}, etc., you may find that duplicate legends are produced.
#' To avoid this, you need to have the same name for aesthetics to be merged in the legend, for example,
#' \preformatted{
#' scale_fill_discrete(name = 'Species')
#' scale_color_discrete(name = 'Species')
#' }
#' or,
#' \preformatted{
#' labs(fill = "Species", color = "Species")
#' }
#'
#'
#' @param pcobj an object returned by \code{\link[stats]{prcomp}}, \code{\link[stats]{princomp}},
#' \code{\link[FactoMineR]{PCA}}, \code{\link[ade4]{dudi.pca}}, or \code{\link[MASS]{lda}}
#' @param choices Which components to plot? An integer vector of length 2.
Expand All @@ -97,6 +112,14 @@
#' @param groups Optional factor variable indicating the groups that the observations belong to.
#' If provided the points will be colored according to groups and this allows data ellipses also
#' to be drawn when \code{ellipse = TRUE}.
#' @param geom.ind a text specifying the geometry to be used for the observations. Allowed
#' values are among \code{c("point", "text")}. Use
#' \code{"point"} (to show only points); \code{"text"} to show only labels;
#' \code{c("point", "text")} to show both.
#' @param geom.var a text specifying the geometry to be used for the variables. Allowed
#' values are among \code{c("arrow", "text")}. Use
#' \code{"arrow"} (to show only vectors); \code{"text"} to show only labels;
#' \code{c("arrow", "text")} to show both.
#' @param point.size Size of observation points.
#' @param ellipse Logical; draw a normal data ellipse for each group?
#' @param ellipse.prob Coverage size of the data ellipse in Normal probability
Expand Down Expand Up @@ -131,7 +154,7 @@
#' \code{\link[stats]{biplot}} for the original stats package version;
#' \code{\link[factoextra]{fviz_pca_biplot}} for the factoextra package version.
#'
#' @author Vincent Q. Vu.
#' @author Vincent Q. Vu., Michael Friendly
#' @references
#' Gabriel, K. R. (1971). The biplot graphical display of matrices with application to principal component analysis.
#' \emph{Biometrika}, \bold{58}, 453–467. \doi{10.2307/2334381}.
Expand All @@ -157,6 +180,22 @@
#' varname.size = 4,
#' groups = wine.class,
#' ellipse = TRUE, circle = TRUE)
#'
#' # Easier interpretation if the axes are reflected
#' wine.pca <- reflect(wine.pca)
#' # Use direct labels rather than a legend
#' means <- aggregate(cbind(PC1, PC2) ~ wine.class,
#' data = wine.pca$x, FUN = mean)
#'
#' ggbiplot(wine.pca,
#' obs.scale = 1, var.scale = 1,
#' groups = wine.class,
#' varname.size = 4,
#' ellipse = TRUE,
#' circle = TRUE) +
#' geom_label(data = means, aes(x=PC1, y=PC2, label = wine.class)) +
#' theme(legend.position = 'none')
#'
#'
#' data(iris)
#' iris.pca <- prcomp (~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
Expand All @@ -181,6 +220,8 @@ ggbiplot <- function(pcobj,
var.scale = scale,
var.factor = 1, # MF
groups = NULL,
geom.ind = "point",
geom.var = c("arrow", "text"),
point.size = 1.5,
ellipse = FALSE,
ellipse.prob = 0.68,
Expand Down Expand Up @@ -276,9 +317,9 @@ ggbiplot <- function(pcobj,

# Change the title labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized ', axis.title, choices, sep='')
u.axis.labs <- paste0('standardized ', axis.title, choices)
} else {
u.axis.labs <- paste(axis.title, choices, sep='')
u.axis.labs <- paste0(axis.title, choices)
}

# Append the proportion of explained variance to the axis labels
Expand All @@ -297,11 +338,11 @@ ggbiplot <- function(pcobj,
}

# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
vn <- rownames(v)
if(is.logical(varname.abbrev) & isTRUE(varname.abbrev)) {
vn <- abbreviate(vn)
}
df.v$varname <- vn

# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
Expand All @@ -314,18 +355,39 @@ ggbiplot <- function(pcobj,
coord_equal(clip = clip)

# Draw either labels or points
if(!is.null(df.u$labels)) {
# if(!is.null(df.u$labels)) {
# if(!is.null(df.u$groups)) {
# g <- g + geom_text(aes(label = labels, color = groups),
# size = labels.size)
# } else {
# g <- g + geom_text(aes(label = labels), size = labels.size)
# }
# } else {
# if(!is.null(df.u$groups)) {
# g <- g + geom_point(aes(color = groups), alpha = alpha, size = point.size)
# } else {
# g <- g + geom_point(alpha = alpha, size = point.size)
# }
# }

# Allow to use points and/or labels
if("point" %in% geom.ind) {
if(!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
g <- g + geom_point(aes(color = groups, shape = groups),
alpha = alpha, size = point.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
g <- g + geom_point(alpha = alpha, size = point.size)
}
} else {
}
if("text" %in% geom.ind) {
if(!is.null(df.u$groups)) {
g <- g + geom_point(aes(color = groups), alpha = alpha, size = point.size)
# g <- g + geom_text(aes(label = labels, color = groups),
# size = labels.size, vjust = -0.5)
g <- g + geom_text(aes(label = labels),
size = labels.size, vjust = -0.5)
} else {
g <- g + geom_point(alpha = alpha, size = point.size)
g <- g + geom_text(aes(label = labels),
size = labels.size, vjust = -0.5)
}
}

Expand All @@ -341,60 +403,23 @@ ggbiplot <- function(pcobj,
}

# Draw directions
if("arrow" %in% geom.var) {
arrow_style <- arrow(length = unit(1/2, 'picas'), type="closed", angle=15)
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow_style,
color = varname.color,
linewidth = 1.4) # MR: was 1.2
}
}

# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))

# ell <- ddply(df.u, 'groups', function(x) {
# if(nrow(x) <= 2) {
# return(NULL)
# }
# sigma <- var(cbind(x$xvar, x$yvar))
# mu <- c(mean(x$xvar), mean(x$yvar))
# ed <- sqrt(qchisq(ellipse.prob, df = 2))
# data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
# groups = x$groups[1])
# })
# names(ell)[1:2] <- c('xvar', 'yvar')

# ell <-
# df.u |>
# group_by(groups) |>
# filter(n() > 2) |>
# summarize(
# sigma = list(var(cbind(xvar, yvar))),
# mu = list(c(mean(xvar), mean(yvar))),
# ed = sqrt(qchisq(ellipse.prob, df = 2)),
# circle_chol = list(circle %*% chol(sigma[[1]]) * ed),
# ell = list(sweep(circle_chol[[1]], 2, mu[[1]], FUN = "+")),
# xvar = map(ell, ~.x[,1]),
# yvar = map(ell, ~.x[,2]),
# .groups = "drop"
# ) |>
# dplyr::select(xvar, yvar, groups) |>
# tidyr::unnest(c(xvar, yvar))

# g <- g + geom_path(data = ell,
# aes(color = groups,
# group = groups),
# linewidth = ellipse.linewidth)
# g <- g + geom_polygon(data = ell,
# aes(color = groups,
# fill = groups
# # group = groups
# ),
# alpha = 0.4, # MF: why doesn't this have any effect?
# linewidth = ellipse.linewidth)

# Overlay a concentration ellipse if there are groups
geom <- if(isTRUE(ellipse.fill)) "polygon" else "path"
Expand All @@ -418,12 +443,12 @@ ggbiplot <- function(pcobj,
}

# Label the variable axes
if(var.axes) {
if(var.axes & "text" %in% geom.var) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = varname.color, size = varname.size)
color = varname.color, size = varname.size, lineheight = 0.75)
}
# Change the name of the legend for groups
# if(!is.null(groups)) {
Expand Down
16 changes: 11 additions & 5 deletions R/reflect.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@
#'
#' crime.pca <- reflect(crime.pca) # reflect columns 1:2
#' biplot(crime.pca)
#'
#' iris.lda <- MASS::lda(Species ~ ., data=iris)
#' #reflect the first dimension
#' iris.lda1 <- reflect(iris.lda, columns = 1)
#' # compare predicted scores
#' predict(iris.lda)$x |> head()
#' predict(iris.lda1)$x |> head()

reflect <- function(pcobj, columns = 1:2){

Expand All @@ -60,11 +67,10 @@ reflect <- function(pcobj, columns = 1:2){
if ("quanti.sup" %in% names(pcobj)) pcobj$quanti.sup$coord[, columns] <- -1 * pcobj$quanti.sup$coord[, columns]
}
else if(inherits(pcobj, "lda")) {
warning("Can't reflect an 'lda' object") # Why not???
# u <- predict(pcobj)$x
# check(u, columns)
# pcobj$scaling[, columns] <- -1 * pcobj$scaling[, columns]
# pcobj$x[, columns] <- -1 * pcobj$x[, columns]
# lda objects don't have a scores (x) component. They come from predict()
u <- predict(pcobj)$x
check(u, columns)
pcobj$scaling[, columns] <- -1 * pcobj$scaling[, columns]
}
else {
stop('Expected a object of class "prcomp", "princomp", "PCA", or "lda"')
Expand Down
Loading