From 9ee58612c9b0f41fa8876c6a2e248f48632a3be3 Mon Sep 17 00:00:00 2001 From: andrie Date: Sat, 23 Jun 2012 19:31:15 +0100 Subject: [PATCH] Update 23/6/2012 --- .gitattributes | 22 ++ NAMESPACE | 24 +- NEWS | 84 ++--- R/dendrogram.R | 542 +++++++++++++++---------------- R/ggdendro-package.R | 80 ++--- R/ggdendrogram.R | 178 +++++----- R/rpart.R | 420 ++++++++++++------------ R/tree.r | 298 ++++++++--------- man/dendro_data.dendrogram.Rd | 134 ++++---- man/dendro_data.rpart.Rd | 96 +++--- man/dendro_data.tree.Rd | 110 +++---- man/dendrogram_data.Rd | 70 ++-- man/get_data_tree_leaf_labels.Rd | 48 +-- man/ggdendrogram.Rd | 96 +++--- man/rpart_labels.Rd | 72 ++-- man/rpart_segments.Rd | 48 +-- man/theme_dendro.Rd | 26 +- man/tree_labels.Rd | 56 ++-- man/tree_segments.Rd | 50 +-- vignettes/ggdendro.Rnw | 424 ++++++++++++------------ 20 files changed, 1450 insertions(+), 1428 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..412eeda --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Custom for Visual Studio +*.cs diff=csharp +*.sln merge=union +*.csproj merge=union +*.vbproj merge=union +*.fsproj merge=union +*.dbproj merge=union + +# Standard to msysgit +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain diff --git a/NAMESPACE b/NAMESPACE index 1a23934..4a74876 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,12 @@ -export(as.dendro) -export(dendro_data) -export(dendro_data.default) -export(dendro_data.dendrogram) -export(dendro_data.hclust) -export(ggdendrogram) -export(label) -export(leaf_label) -export(segment) -export(theme_dendro) -S3method(dendro_data,rpart) -S3method(dendro_data,tree) +export(as.dendro) +export(dendro_data) +export(dendro_data.default) +export(dendro_data.dendrogram) +export(dendro_data.hclust) +export(ggdendrogram) +export(label) +export(leaf_label) +export(segment) +export(theme_dendro) +S3method(dendro_data,rpart) +S3method(dendro_data,tree) diff --git a/NEWS b/NEWS index b01ef8b..2d70c71 100644 --- a/NEWS +++ b/NEWS @@ -1,43 +1,43 @@ -ggdendro 0.1-04 (Release date: 02/02/2012) -============== - -New functionality - -* None -* Modified examples to conform to ggplot2 v0.9 - - -ggdendro 0.1-02 (Release date: 19/10/2011) -============== - -New functionality - -* Added support for classification trees using `package:rpart` - -Fixed bugs: - -* Classification trees using `tree` produced error due to attempted rounding on factor variables - -Changes in API - -* Fixed inconsistencies in the names of the `data.frame` segments. The names are now always `x`, `y`, `xend` and `yend` - - -ggdendro 0.0-7 (Release date: 12/8/2011) -============== - -New functionality - -* Included the `ggdendrogram` function that conveniently creates a `ggplot` dendrogram in a single line of code. -* Created `theme_dendro`, an almost blank theme - -ggdendro 0.0 -============ - -Experimental release of package. - -Supports extraction of plot data for the following classes: - -* dendrogram -* tree +ggdendro 0.1-04 (Release date: 02/02/2012) +============== + +New functionality + +* None +* Modified examples to conform to ggplot2 v0.9 + + +ggdendro 0.1-02 (Release date: 19/10/2011) +============== + +New functionality + +* Added support for classification trees using `package:rpart` + +Fixed bugs: + +* Classification trees using `tree` produced error due to attempted rounding on factor variables + +Changes in API + +* Fixed inconsistencies in the names of the `data.frame` segments. The names are now always `x`, `y`, `xend` and `yend` + + +ggdendro 0.0-7 (Release date: 12/8/2011) +============== + +New functionality + +* Included the `ggdendrogram` function that conveniently creates a `ggplot` dendrogram in a single line of code. +* Created `theme_dendro`, an almost blank theme + +ggdendro 0.0 +============ + +Experimental release of package. + +Supports extraction of plot data for the following classes: + +* dendrogram +* tree * hclust \ No newline at end of file diff --git a/R/dendrogram.R b/R/dendrogram.R index 7ce4461..c130245 100644 --- a/R/dendrogram.R +++ b/R/dendrogram.R @@ -1,271 +1,271 @@ - -############################################################################### -#' Extract line segment and label data from dendrogram or hclust object. -#' -#' Extract line segment and label data from dendrogram or hclust object. Results are stored in a -#' list of data frames containing line segment data and label data. -#' -#' @param model object of class "dendrogram", e.g. the output of as.dendrogram() -#' @param type The type of plot, indicating the shape of the dendrogram. "rectangle" will draw -#' rectangular lines, while "triangle" will draw triangular lines. -#' @param ... ignored -#' @aliases dendro_data.dendrogram dendro_data.hclust -#' @method dendro_data dendrogram -#' @method dendro_data hclust -#' @export dendro_data.dendrogram dendro_data.hclust -#' @return -#' A list with the following elements: -#' \item{segments}{Line segment data} -#' \item{labels}{Label data} -#' @seealso \code{\link{ggdendrogram}} -#' @family dendro_data methods -#' @family dendrogram/hclust functions -#' @examples -#' require(ggplot2) -#' # -#' # Demonstrate dendro_data.dendrogram -#' # -#' hc <- hclust(dist(USArrests), "ave") -#' dhc <- as.dendrogram(hc) -#' # Rectangular lines -#' ddata <- dendro_data(dhc, type="rectangle") -#' ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + -#' coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() -#' # Triangular lines -#' ddata <- dendro_data(dhc, type="triangle") -#' ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + theme_dendro() -#' # -#' # Demonstrate dendro_data.hclust -#' # -#' require(ggplot2) -#' hc <- hclust(dist(USArrests), "ave") -#' # Rectangular lines -#' hcdata <- dendro_data(hc, type="rectangle") -#' ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + -#' coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() -#' # Triangular lines -#' hcdata <- dendro_data(hc, type="triangle") -#' ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + -#' theme_dendro() -dendro_data.dendrogram <- function (model, type = c("rectangle", "triangle"), ...){ - hcdata <- dendrogram_data(model, type=type, ...) - as.dendro( - segments = hcdata$segments, - labels = hcdata$labels, - class="dendrogram" - ) -} - -dendro_data.hclust <- function (model, type = c("rectangle", "triangle"), ...){ - dhc <- as.dendrogram(model) - hcdata <- dendrogram_data(dhc, type=type, ...) - as.dendro( - segments = hcdata$segments, - labels = hcdata$labels, - class="hclust" - ) -} - - - - - - -#' Extract data frame from dendrogram object for plotting using ggplot. -#' -#' Extract data frame from dendrogram object for plotting using ggplot -#' -#' @param x object of class "dendrogram", e.g. the output of as.dendrogram() -#' @param type The type of plot, indicating the shape of the dendrogram. "Rectangle" will draw -#' rectangular lines, while "triangle" will draw triangular lines. -#' @param ... ignored -#' @seealso \code{\link{ggdendrogram}} -#' @family dendro_data methods -#' @family dendrogram/hclust functions -#' @keywords internal -dendrogram_data <- function (x, type = c("rectangle", "triangle"), ...){ - - # Initialise variables that used to be in parameter list - leaflab <- "perpendicular" - center <- FALSE - xlab <- "" - ylab <- "" - horiz <- FALSE - #frame.plot <- FALSE - xaxt <- "n" - yaxt <- "s" - nodePar <- NULL - edgePar <- list() - dLeaf <- NULL - edge.root <- is.leaf(x) || !is.null(attr(x, "edgetext")) - - type <- match.arg(type) - #leaflab <- match.arg(leaflab) - hgt <- attr(x, "height") - if (edge.root && is.logical(edge.root)) - edge.root <- 0.0625 * if (is.leaf(x)) 1 else hgt - mem.x <- stats:::.memberDend(x) - yTop <- hgt + edge.root - if (center) { - x1 <- 0.5 - x2 <- mem.x + 0.5 - } - else { - x1 <- 1 - x2 <- mem.x - } - xl. <- c(x1 - 1/2, x2 + 1/2) - yl. <- c(0, yTop) -# if (missing(xlim) || is.null(xlim)) xlim <- xl. -# if (missing(ylim) || is.null(ylim)) ylim <- yl. -# plot(0, xlim = xlim, ylim = ylim, type = "n", xlab = xlab, -# ylab = ylab, xaxt = xaxt, yaxt = yaxt, frame.plot = frame.plot, -# ...) -# if (is.null(dLeaf)) -# dLeaf <- 0.75 * (if (horiz) -# strwidth("w") -# else strheight("x")) - if (edge.root) { -# x0 <- stats:::plotNodeLimit(x1, x2, x, center)$x -# (x0, hgt, x0, yTop) - if (!is.null(et <- attr(x, "edgetext"))) { - my <- mean(hgt, yTop) -# text(x0, my, et) - } - } - - gg.plotNode <- function (x1, x2, subtree, type, center, leaflab, dLeaf, nodePar, - edgePar, horiz=FALSE, ddsegments=NULL, ddlabels=NULL) { - inner <- !is.leaf(subtree) && x1 != x2 - yTop <- attr(subtree, "height") - bx <- stats:::plotNodeLimit(x1, x2, subtree, center) - xTop <- bx$x - hasP <- !is.null(nPar <- attr(subtree, "nodePar")) - if (!hasP) nPar <- nodePar - Xtract <- function(nam, L, default, indx) rep(if (nam %in% - names(L)) L[[nam]] else default, length.out = indx)[indx] - asTxt <- function(x){ - if (is.character(x) || is.expression(x)) - x else - if (is.null(x)) "" else as.character(x) - } - i <- if (inner || hasP) - 1 - else 2 - if (!is.null(nPar)) { - pch <- Xtract("pch", nPar, default = 1L:2, i) - cex <- Xtract("cex", nPar, default = c(1, 1), i) - col <- Xtract("col", nPar, default = par("col"), i) - bg <- Xtract("bg", nPar, default = par("bg"), i) - points(if (horiz) - cbind(yTop, xTop) - else cbind(xTop, yTop), pch = pch, bg = bg, col = col, cex = cex) - } - lab.col <- Xtract("lab.col", nPar, default = par("col"), i) - lab.cex <- Xtract("lab.cex", nPar, default = c(1, 1), i) - lab.font <- Xtract("lab.font", nPar, default = par("font"), i) - if (is.leaf(subtree)) { - if (leaflab == "perpendicular") { - Y <- yTop - dLeaf * lab.cex - X <- xTop - srt <- 90 - adj <- 1 - nodeText <- asTxt(attr(subtree, "label")) - # ************************* -# text(X, Y, nodeText, xpd = TRUE, srt = srt, adj = adj, -# cex = lab.cex, col = lab.col, font = lab.font) - ddlabels <- rbind(ddlabels, data.frame(x=X, y=0, text=nodeText)) - } - } - else if (inner) { - segmentsHV <- function(x0, y0, x1, y1) { - # ************************* -# segments(x0, y0, x1, y1, col = col, lty = lty, lwd = lwd) -# ddsegments <- rbind(ddsegments, data.frame(x0, y0, x1, y1)) #AdV - data.frame(x0, y0, x1, y1) #AdV - } - for (k in seq_along(subtree)) { - child <- subtree[[k]] - yBot <- attr(child, "height") - if (getOption("verbose")) - cat("ch.", k, "@ h=", yBot, "; ") - if (is.null(yBot)) - yBot <- 0 - xBot <- if (center) - mean(bx$limit[k:(k + 1)]) - else bx$limit[k] + stats:::.midDend(child) -# hasE <- !is.null(ePar <- attr(child, "edgePar")) -# if (!hasE) ePar <- edgePar -# i <- if (!is.leaf(child) || hasE) 1 else 2 -# col <- Xtract("col", ePar, default = par("col"), i) -# lty <- Xtract("lty", ePar, default = par("lty"), i) -# lwd <- Xtract("lwd", ePar, default = par("lwd"), i) - if (type == "triangle") { - # ************************* - ddsegments <- rbind(ddsegments, segmentsHV(xTop, yTop, xBot, yBot)) - } - else { - # ************************* - ddsegments <- rbind(ddsegments, segmentsHV(xTop, yTop, xBot, yTop)) - ddsegments <- rbind(ddsegments, segmentsHV(xBot, yTop, xBot, yBot)) - } - vln <- NULL -# if (is.leaf(child) && leaflab == "textlike") { -# nodeText <- asTxt(attr(child, "label")) -# hln <- 0.6 * strwidth(nodeText, cex = lab.cex)/2 -# vln <- 1.5 * strheight(nodeText, cex = lab.cex)/2 - # ************************* -# rect(xBot - hln, yBot, xBot + hln, yBot + 2 * -# vln, col = p.col) - # ************************* -# text(xBot, yBot + vln, nodeText, xpd = TRUE, -# cex = lab.cex, col = lab.col, font = lab.font) -# } - if (!is.null(attr(child, "edgetext"))) { - edgeText <- asTxt(attr(child, "edgetext")) - if (!is.null(vln)) { - mx <- if (type == "triangle") - (xTop + xBot + ((xTop - xBot)/(yTop - yBot)) * vln)/2 - else xBot - my <- (yTop + yBot + 2 * vln)/2 - } - else { - mx <- if (type == "triangle") - (xTop + xBot)/2 - else xBot - my <- (yTop + yBot)/2 - } -# p.col <- Xtract("p.col", ePar, default = "white", i) -# p.border <- Xtract("p.border", ePar, default = par("fg"), i) -# p.lwd <- Xtract("p.lwd", ePar, default = lwd, i) -# p.lty <- Xtract("p.lty", ePar, default = lty, i) -# t.col <- Xtract("t.col", ePar, default = col, i) -# t.cex <- Xtract("t.cex", ePar, default = 1, i) -# t.font <- Xtract("t.font", ePar, default = par("font"), i) -# vlm <- strheight(c(edgeText, "h"), cex = t.cex)/2 -# hlm <- strwidth(c(edgeText, "m"), cex = t.cex)/2 -# hl3 <- c(hlm[1L], hlm[1L] + hlm[2L], hlm[1L]) - # ************************* -# polygon(mx + c(-hl3, hl3), my + sum(vlm) * -# c(-1L:1L, 1L:-1L), col = p.col, border = p.border, -# lty = p.lty, lwd = p.lwd) - # ************************* -# text(mx, my, edgeText, cex = t.cex, col = t.col, font = t.font) - } - plotNode_result <- gg.plotNode(bx$limit[k], bx$limit[k + 1], subtree = child, - type, center, leaflab, dLeaf, nodePar, edgePar, horiz, ddsegments, ddlabels) - ddsegments <- plotNode_result$segments - ddlabels <- plotNode_result$labels - } - } - return(list(segments=ddsegments, labels=ddlabels)) - } - - ret <- gg.plotNode(x1, x2, x, type = type, center = center, leaflab = leaflab, - dLeaf = dLeaf, nodePar = nodePar, edgePar = edgePar, horiz=FALSE, - ddsegments=NULL, ddlabels=NULL) - names(ret$segments) <- c("x", "y", "xend", "yend") - names(ret$labels) <- c("x", "y", "label") - ret -} - + +############################################################################### +#' Extract line segment and label data from dendrogram or hclust object. +#' +#' Extract line segment and label data from dendrogram or hclust object. Results are stored in a +#' list of data frames containing line segment data and label data. +#' +#' @param model object of class "dendrogram", e.g. the output of as.dendrogram() +#' @param type The type of plot, indicating the shape of the dendrogram. "rectangle" will draw +#' rectangular lines, while "triangle" will draw triangular lines. +#' @param ... ignored +#' @aliases dendro_data.dendrogram dendro_data.hclust +#' @method dendro_data dendrogram +#' @method dendro_data hclust +#' @export dendro_data.dendrogram dendro_data.hclust +#' @return +#' A list with the following elements: +#' \item{segments}{Line segment data} +#' \item{labels}{Label data} +#' @seealso \code{\link{ggdendrogram}} +#' @family dendro_data methods +#' @family dendrogram/hclust functions +#' @examples +#' require(ggplot2) +#' # +#' # Demonstrate dendro_data.dendrogram +#' # +#' hc <- hclust(dist(USArrests), "ave") +#' dhc <- as.dendrogram(hc) +#' # Rectangular lines +#' ddata <- dendro_data(dhc, type="rectangle") +#' ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + +#' coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() +#' # Triangular lines +#' ddata <- dendro_data(dhc, type="triangle") +#' ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + theme_dendro() +#' # +#' # Demonstrate dendro_data.hclust +#' # +#' require(ggplot2) +#' hc <- hclust(dist(USArrests), "ave") +#' # Rectangular lines +#' hcdata <- dendro_data(hc, type="rectangle") +#' ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + +#' coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() +#' # Triangular lines +#' hcdata <- dendro_data(hc, type="triangle") +#' ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + +#' theme_dendro() +dendro_data.dendrogram <- function (model, type = c("rectangle", "triangle"), ...){ + hcdata <- dendrogram_data(model, type=type, ...) + as.dendro( + segments = hcdata$segments, + labels = hcdata$labels, + class="dendrogram" + ) +} + +dendro_data.hclust <- function (model, type = c("rectangle", "triangle"), ...){ + dhc <- as.dendrogram(model) + hcdata <- dendrogram_data(dhc, type=type, ...) + as.dendro( + segments = hcdata$segments, + labels = hcdata$labels, + class="hclust" + ) +} + + + + + + +#' Extract data frame from dendrogram object for plotting using ggplot. +#' +#' Extract data frame from dendrogram object for plotting using ggplot +#' +#' @param x object of class "dendrogram", e.g. the output of as.dendrogram() +#' @param type The type of plot, indicating the shape of the dendrogram. "Rectangle" will draw +#' rectangular lines, while "triangle" will draw triangular lines. +#' @param ... ignored +#' @seealso \code{\link{ggdendrogram}} +#' @family dendro_data methods +#' @family dendrogram/hclust functions +#' @keywords internal +dendrogram_data <- function (x, type = c("rectangle", "triangle"), ...){ + + # Initialise variables that used to be in parameter list + leaflab <- "perpendicular" + center <- FALSE + xlab <- "" + ylab <- "" + horiz <- FALSE + #frame.plot <- FALSE + xaxt <- "n" + yaxt <- "s" + nodePar <- NULL + edgePar <- list() + dLeaf <- NULL + edge.root <- is.leaf(x) || !is.null(attr(x, "edgetext")) + + type <- match.arg(type) + #leaflab <- match.arg(leaflab) + hgt <- attr(x, "height") + if (edge.root && is.logical(edge.root)) + edge.root <- 0.0625 * if (is.leaf(x)) 1 else hgt + mem.x <- stats:::.memberDend(x) + yTop <- hgt + edge.root + if (center) { + x1 <- 0.5 + x2 <- mem.x + 0.5 + } + else { + x1 <- 1 + x2 <- mem.x + } + xl. <- c(x1 - 1/2, x2 + 1/2) + yl. <- c(0, yTop) +# if (missing(xlim) || is.null(xlim)) xlim <- xl. +# if (missing(ylim) || is.null(ylim)) ylim <- yl. +# plot(0, xlim = xlim, ylim = ylim, type = "n", xlab = xlab, +# ylab = ylab, xaxt = xaxt, yaxt = yaxt, frame.plot = frame.plot, +# ...) +# if (is.null(dLeaf)) +# dLeaf <- 0.75 * (if (horiz) +# strwidth("w") +# else strheight("x")) + if (edge.root) { +# x0 <- stats:::plotNodeLimit(x1, x2, x, center)$x +# (x0, hgt, x0, yTop) + if (!is.null(et <- attr(x, "edgetext"))) { + my <- mean(hgt, yTop) +# text(x0, my, et) + } + } + + gg.plotNode <- function (x1, x2, subtree, type, center, leaflab, dLeaf, nodePar, + edgePar, horiz=FALSE, ddsegments=NULL, ddlabels=NULL) { + inner <- !is.leaf(subtree) && x1 != x2 + yTop <- attr(subtree, "height") + bx <- stats:::plotNodeLimit(x1, x2, subtree, center) + xTop <- bx$x + hasP <- !is.null(nPar <- attr(subtree, "nodePar")) + if (!hasP) nPar <- nodePar + Xtract <- function(nam, L, default, indx) rep(if (nam %in% + names(L)) L[[nam]] else default, length.out = indx)[indx] + asTxt <- function(x){ + if (is.character(x) || is.expression(x)) + x else + if (is.null(x)) "" else as.character(x) + } + i <- if (inner || hasP) + 1 + else 2 + if (!is.null(nPar)) { + pch <- Xtract("pch", nPar, default = 1L:2, i) + cex <- Xtract("cex", nPar, default = c(1, 1), i) + col <- Xtract("col", nPar, default = par("col"), i) + bg <- Xtract("bg", nPar, default = par("bg"), i) + points(if (horiz) + cbind(yTop, xTop) + else cbind(xTop, yTop), pch = pch, bg = bg, col = col, cex = cex) + } + lab.col <- Xtract("lab.col", nPar, default = par("col"), i) + lab.cex <- Xtract("lab.cex", nPar, default = c(1, 1), i) + lab.font <- Xtract("lab.font", nPar, default = par("font"), i) + if (is.leaf(subtree)) { + if (leaflab == "perpendicular") { + Y <- yTop - dLeaf * lab.cex + X <- xTop + srt <- 90 + adj <- 1 + nodeText <- asTxt(attr(subtree, "label")) + # ************************* +# text(X, Y, nodeText, xpd = TRUE, srt = srt, adj = adj, +# cex = lab.cex, col = lab.col, font = lab.font) + ddlabels <- rbind(ddlabels, data.frame(x=X, y=0, text=nodeText)) + } + } + else if (inner) { + segmentsHV <- function(x0, y0, x1, y1) { + # ************************* +# segments(x0, y0, x1, y1, col = col, lty = lty, lwd = lwd) +# ddsegments <- rbind(ddsegments, data.frame(x0, y0, x1, y1)) #AdV + data.frame(x0, y0, x1, y1) #AdV + } + for (k in seq_along(subtree)) { + child <- subtree[[k]] + yBot <- attr(child, "height") + if (getOption("verbose")) + cat("ch.", k, "@ h=", yBot, "; ") + if (is.null(yBot)) + yBot <- 0 + xBot <- if (center) + mean(bx$limit[k:(k + 1)]) + else bx$limit[k] + stats:::.midDend(child) +# hasE <- !is.null(ePar <- attr(child, "edgePar")) +# if (!hasE) ePar <- edgePar +# i <- if (!is.leaf(child) || hasE) 1 else 2 +# col <- Xtract("col", ePar, default = par("col"), i) +# lty <- Xtract("lty", ePar, default = par("lty"), i) +# lwd <- Xtract("lwd", ePar, default = par("lwd"), i) + if (type == "triangle") { + # ************************* + ddsegments <- rbind(ddsegments, segmentsHV(xTop, yTop, xBot, yBot)) + } + else { + # ************************* + ddsegments <- rbind(ddsegments, segmentsHV(xTop, yTop, xBot, yTop)) + ddsegments <- rbind(ddsegments, segmentsHV(xBot, yTop, xBot, yBot)) + } + vln <- NULL +# if (is.leaf(child) && leaflab == "textlike") { +# nodeText <- asTxt(attr(child, "label")) +# hln <- 0.6 * strwidth(nodeText, cex = lab.cex)/2 +# vln <- 1.5 * strheight(nodeText, cex = lab.cex)/2 + # ************************* +# rect(xBot - hln, yBot, xBot + hln, yBot + 2 * +# vln, col = p.col) + # ************************* +# text(xBot, yBot + vln, nodeText, xpd = TRUE, +# cex = lab.cex, col = lab.col, font = lab.font) +# } + if (!is.null(attr(child, "edgetext"))) { + edgeText <- asTxt(attr(child, "edgetext")) + if (!is.null(vln)) { + mx <- if (type == "triangle") + (xTop + xBot + ((xTop - xBot)/(yTop - yBot)) * vln)/2 + else xBot + my <- (yTop + yBot + 2 * vln)/2 + } + else { + mx <- if (type == "triangle") + (xTop + xBot)/2 + else xBot + my <- (yTop + yBot)/2 + } +# p.col <- Xtract("p.col", ePar, default = "white", i) +# p.border <- Xtract("p.border", ePar, default = par("fg"), i) +# p.lwd <- Xtract("p.lwd", ePar, default = lwd, i) +# p.lty <- Xtract("p.lty", ePar, default = lty, i) +# t.col <- Xtract("t.col", ePar, default = col, i) +# t.cex <- Xtract("t.cex", ePar, default = 1, i) +# t.font <- Xtract("t.font", ePar, default = par("font"), i) +# vlm <- strheight(c(edgeText, "h"), cex = t.cex)/2 +# hlm <- strwidth(c(edgeText, "m"), cex = t.cex)/2 +# hl3 <- c(hlm[1L], hlm[1L] + hlm[2L], hlm[1L]) + # ************************* +# polygon(mx + c(-hl3, hl3), my + sum(vlm) * +# c(-1L:1L, 1L:-1L), col = p.col, border = p.border, +# lty = p.lty, lwd = p.lwd) + # ************************* +# text(mx, my, edgeText, cex = t.cex, col = t.col, font = t.font) + } + plotNode_result <- gg.plotNode(bx$limit[k], bx$limit[k + 1], subtree = child, + type, center, leaflab, dLeaf, nodePar, edgePar, horiz, ddsegments, ddlabels) + ddsegments <- plotNode_result$segments + ddlabels <- plotNode_result$labels + } + } + return(list(segments=ddsegments, labels=ddlabels)) + } + + ret <- gg.plotNode(x1, x2, x, type = type, center = center, leaflab = leaflab, + dLeaf = dLeaf, nodePar = nodePar, edgePar = edgePar, horiz=FALSE, + ddsegments=NULL, ddlabels=NULL) + names(ret$segments) <- c("x", "y", "xend", "yend") + names(ret$labels) <- c("x", "y", "label") + ret +} + diff --git a/R/ggdendro-package.R b/R/ggdendro-package.R index 50c7ce2..530e4c8 100644 --- a/R/ggdendro-package.R +++ b/R/ggdendro-package.R @@ -1,40 +1,40 @@ -# package documentation -# -# Author: Andrie -############################################################################### - - -#' Tools for extracting dendrogram and tree diagram plot data for use with ggplot. -#' -#' This is a set of tools for dendrograms and tree plots using \code{\link[ggplot2]{ggplot}} -#' -#' The ggplot philosophy is to clearly separate data from the presentation. Unfortunately the plot method for dendrograms (\code{\link{plot.dendrogram}}) plots directly to a plot device without exposing the data. The ggdendro package resolves this by making available functions that extract the dendrogram plot data. This data can be used with ggplot. -#' -#' \code{\link{dendro_data}} extracts data from several cluster algorithms. It is a generic function with specific implementations for: -#' -#' \itemize{ -#' \item hclust: \code{\link{dendro_data.hclust}} -#' \item dendrogram: \code{\link{dendro_data.dendrogram}} -#' \item regression trees: \code{\link{dendro_data.tree}} -#' \item classification trees: \code{\link{dendro_data.rpart}} -#' } -#' -#' These methods create an object of class \code{dendro}, consisting of a list of data.frames. To extract the relevant data frames from the list, there are three accessor functions: -#' -#' \itemize{ -#' \item{\code{\link{segment}}}{the line segment data} -#' \item{\code{\link{label}}}{the text for each end segment} -#' \item{\code{\link{leaf_label}}}{the leaf labels of a tree diagram} -#' } -#' -#' To plot a dendrogram, either construct a plot with \code{\link[ggplot2]{ggplot}} or use the function \code{\link{ggdendrogram}} -#' -#' @name ggdendro-package -#' @aliases ggdendro -#' @docType package -#' @title Tools for creating dendrograms, regresion tree and classification tree plots using ggplot in [R] -#' @author Andrie de Vries \email{andrie.de.vries@@pentalibra.com} -#' @keywords package -#' @seealso \code{\link{dendro_data}} -NULL - +# package documentation +# +# Author: Andrie +############################################################################### + + +#' Tools for extracting dendrogram and tree diagram plot data for use with ggplot. +#' +#' This is a set of tools for dendrograms and tree plots using \code{\link[ggplot2]{ggplot}} +#' +#' The ggplot philosophy is to clearly separate data from the presentation. Unfortunately the plot method for dendrograms (\code{\link{plot.dendrogram}}) plots directly to a plot device without exposing the data. The ggdendro package resolves this by making available functions that extract the dendrogram plot data. This data can be used with ggplot. +#' +#' \code{\link{dendro_data}} extracts data from several cluster algorithms. It is a generic function with specific implementations for: +#' +#' \itemize{ +#' \item hclust: \code{\link{dendro_data.hclust}} +#' \item dendrogram: \code{\link{dendro_data.dendrogram}} +#' \item regression trees: \code{\link{dendro_data.tree}} +#' \item classification trees: \code{\link{dendro_data.rpart}} +#' } +#' +#' These methods create an object of class \code{dendro}, consisting of a list of data.frames. To extract the relevant data frames from the list, there are three accessor functions: +#' +#' \itemize{ +#' \item{\code{\link{segment}}}{the line segment data} +#' \item{\code{\link{label}}}{the text for each end segment} +#' \item{\code{\link{leaf_label}}}{the leaf labels of a tree diagram} +#' } +#' +#' To plot a dendrogram, either construct a plot with \code{\link[ggplot2]{ggplot}} or use the function \code{\link{ggdendrogram}} +#' +#' @name ggdendro-package +#' @aliases ggdendro +#' @docType package +#' @title Tools for creating dendrograms, regresion tree and classification tree plots using ggplot in [R] +#' @author Andrie de Vries \email{andrie.de.vries@@pentalibra.com} +#' @keywords package +#' @seealso \code{\link{dendro_data}} +NULL + diff --git a/R/ggdendrogram.R b/R/ggdendrogram.R index 069a9d7..9607aa2 100644 --- a/R/ggdendrogram.R +++ b/R/ggdendrogram.R @@ -1,89 +1,89 @@ -# Add comment -# -# Author: Andrie -#---------------------------------------------------------------------------------- - - -#' Creates dendrogram plot using ggplot. -#' -#' Creates dendrogram plot using ggplot. -#' -#' @param data Either a dendro object or an object that can be coerced to class dendro using the \code{\link{dendro_data}} function, i.e. objects of class dendrogram, hclust or tree -#' @param segments If TRUE, show line segments -#' @param labels if TRUE, shows segment labels -#' @param leaf_labels if TRUE, shows leaf labels -#' @param rotate if TRUE, rotates plot by 90 degrees -#' @param theme_dendro if TRUE, applies a blank theme to plot (see \code{\link{theme_dendro}}) -#' @param ... other parameters passed to \code{\link[ggplot2]{geom_text}} -#' @export -#' @return A \code{\link[ggplot2]{ggplot}} object -#' @seealso \code{\link{dendro_data}} -#' @examples -#' library(ggplot2) -#' hc <- hclust(dist(USArrests), "ave") -#' ### demonstrate plotting directly from object class hclust -#' ggdendrogram(hc, rotate=FALSE) -#' ggdendrogram(hc, rotate=TRUE) -#' ### demonstrate converting hclust to dendro using dendro_data first -#' hcdata <- dendro_data(hc) -#' ggdendrogram(hcdata, rotate=TRUE, size=2) + opts(title="Dendrogram in ggplot2") -ggdendrogram <- function(data, segments=TRUE, labels=TRUE, leaf_labels=TRUE, - rotate=FALSE, theme_dendro=TRUE, ...){ - stopifnot(require(ggplot2)) - dataClass <- if(inherits(data, "dendro")) data$class else class(data) - angle <- if(dataClass %in% c("dendrogram", "hclust")){ - ifelse(rotate, 0, 90) - } else { - ifelse(rotate, 90, 0) - } - hjust <- if(dataClass %in% c("dendrogram", "hclust")){ - ifelse(rotate, 0, 1) - } else { - 0.5 - } - if(!is.dendro(data)) data <- dendro_data(data) - p <- ggplot() - if(all(segments, !is.null(data$segments))){ - p <- p + geom_segment(data=segment(data), - aes_string(x="x", y="y", xend="xend", yend="yend")) - } - if(all(labels, !is.null(data$labels))){ - p <- p + geom_text(data=label(data), - aes_string(x="x", y="y", label="label"), hjust=hjust, angle=angle, ...) - } - if(all(leaf_labels, !is.null(data$leaf_labels))){ - p <- p + geom_text(data=leaf_label(data), - aes_string(x="x", y="y", label="label"), hjust=hjust, angle=angle, ...) - } - if(rotate){ - p <- p + coord_flip() - p <- p + scale_y_reverse(expand=c(0.2, 0)) - } else { - p <- p + scale_y_continuous(expand=c(0.2, 0)) - } - if(theme_dendro) p <- p + theme_dendro() - p -} - - -#' Creates completely blank theme in ggplot. -#' -#' Sets most of the \code{ggplot} options to blank, by returning blank \code{opts} for the panel grid, panel background, axis title, axis text, axis line and axis ticks. -#' @export -theme_dendro <- function(){ - stopifnot(require(ggplot2)) - theme_blank <- ggplot2::theme_blank - ggplot2::opts( - panel.grid.major = theme_blank(), - panel.grid.minor = theme_blank(), - panel.background = theme_blank(), - axis.title.x = theme_text(colour=NA), - axis.title.y = theme_blank(), - axis.text.x = theme_blank(), - axis.text.y = theme_blank(), - axis.line = theme_blank(), - axis.ticks = theme_blank() - ) -} - - +# Add comment +# +# Author: Andrie +#---------------------------------------------------------------------------------- + + +#' Creates dendrogram plot using ggplot. +#' +#' Creates dendrogram plot using ggplot. +#' +#' @param data Either a dendro object or an object that can be coerced to class dendro using the \code{\link{dendro_data}} function, i.e. objects of class dendrogram, hclust or tree +#' @param segments If TRUE, show line segments +#' @param labels if TRUE, shows segment labels +#' @param leaf_labels if TRUE, shows leaf labels +#' @param rotate if TRUE, rotates plot by 90 degrees +#' @param theme_dendro if TRUE, applies a blank theme to plot (see \code{\link{theme_dendro}}) +#' @param ... other parameters passed to \code{\link[ggplot2]{geom_text}} +#' @export +#' @return A \code{\link[ggplot2]{ggplot}} object +#' @seealso \code{\link{dendro_data}} +#' @examples +#' library(ggplot2) +#' hc <- hclust(dist(USArrests), "ave") +#' ### demonstrate plotting directly from object class hclust +#' ggdendrogram(hc, rotate=FALSE) +#' ggdendrogram(hc, rotate=TRUE) +#' ### demonstrate converting hclust to dendro using dendro_data first +#' hcdata <- dendro_data(hc) +#' ggdendrogram(hcdata, rotate=TRUE, size=2) + opts(title="Dendrogram in ggplot2") +ggdendrogram <- function(data, segments=TRUE, labels=TRUE, leaf_labels=TRUE, + rotate=FALSE, theme_dendro=TRUE, ...){ + stopifnot(require(ggplot2)) + dataClass <- if(inherits(data, "dendro")) data$class else class(data) + angle <- if(dataClass %in% c("dendrogram", "hclust")){ + ifelse(rotate, 0, 90) + } else { + ifelse(rotate, 90, 0) + } + hjust <- if(dataClass %in% c("dendrogram", "hclust")){ + ifelse(rotate, 0, 1) + } else { + 0.5 + } + if(!is.dendro(data)) data <- dendro_data(data) + p <- ggplot() + if(all(segments, !is.null(data$segments))){ + p <- p + geom_segment(data=segment(data), + aes_string(x="x", y="y", xend="xend", yend="yend")) + } + if(all(labels, !is.null(data$labels))){ + p <- p + geom_text(data=label(data), + aes_string(x="x", y="y", label="label"), hjust=hjust, angle=angle, ...) + } + if(all(leaf_labels, !is.null(data$leaf_labels))){ + p <- p + geom_text(data=leaf_label(data), + aes_string(x="x", y="y", label="label"), hjust=hjust, angle=angle, ...) + } + if(rotate){ + p <- p + coord_flip() + p <- p + scale_y_reverse(expand=c(0.2, 0)) + } else { + p <- p + scale_y_continuous(expand=c(0.2, 0)) + } + if(theme_dendro) p <- p + theme_dendro() + p +} + + +#' Creates completely blank theme in ggplot. +#' +#' Sets most of the \code{ggplot} options to blank, by returning blank \code{opts} for the panel grid, panel background, axis title, axis text, axis line and axis ticks. +#' @export +theme_dendro <- function(){ + stopifnot(require(ggplot2)) + theme_blank <- ggplot2::theme_blank + ggplot2::opts( + panel.grid.major = theme_blank(), + panel.grid.minor = theme_blank(), + panel.background = theme_blank(), + axis.title.x = theme_text(colour=NA), + axis.title.y = theme_blank(), + axis.text.x = theme_blank(), + axis.text.y = theme_blank(), + axis.line = theme_blank(), + axis.ticks = theme_blank() + ) +} + + diff --git a/R/rpart.R b/R/rpart.R index e607fc2..d61e921 100644 --- a/R/rpart.R +++ b/R/rpart.R @@ -1,210 +1,210 @@ -# Classification trees using rpart -# -# Author: Andrie -#---------------------------------------------------------------------------------- - - - -# Classification Tree with rpart - - -#' Extract data from classification tree object for plotting using ggplot. -#' -#' Extracts data to plot line segments and labels from a \code{\link[rpart]{rpart}} classification tree object. This data can then be manipulated or plotted, e.g. using \code{\link[ggplot2]{ggplot}}. -#' -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @method dendro_data rpart -#' @export -#' @return -#' A list of three data frames: -#' \item{segments}{a data frame containing the line segment data} -#' \item{labels}{a data frame containing the label text data} -#' \item{leaf_labels}{a data frame containing the leaf label text data} -#' @seealso \code{\link{ggdendrogram}} -#' @family dendro_data methods -#' @family rpart functions -#' @examples -#' require(rpart) -#' require(ggplot2) -#' fit <- rpart(Kyphosis ~ Age + Number + Start, method="class", data=kyphosis) -#' fitr <- dendro_data(fit) -#' ggplot() + -#' geom_segment(data=fitr$segments, aes(x=x, y=y, xend=xend, yend=yend)) + -#' geom_text(data=fitr$labels, aes(x=x, y=y, label=label)) + -#' geom_text(data=fitr$leaf_labels, aes(x=x, y=y, label=label)) + -#' theme_dendro() -dendro_data.rpart <- function(model, ...){ - segments <- rpart_segments(model, ) - labels <- rpart_labels(model, ...) - as.dendro( - segments = segments, - labels = labels$labels, - leaf_labels = labels$leaf_labels, - class="rpart" - ) -} - -#' Extract data frame from rpart object for plotting using ggplot. -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @keywords internal -#' @seealso \code{\link{ggdendrogram}} -#' @family rpart functions -rpart_segments <- function (model, ...) { - x <- model - uniform = FALSE - branch = 1 - compress = FALSE - nspace <- -1L - margin = 0 - minbranch = 0.3 - require(rpart) - if (!inherits(x, "rpart")) - stop("Not an rpart object") - if (!is.null(x$frame$splits)) - x <- rpconvert(x) - if (nrow(x$frame) <= 1L) - stop("fit is not a tree, just a root") - if (compress & missing(nspace)) - nspace <- branch - if (!compress) - nspace <- -1L - if (dev.cur() == 1L) - dev.new() - assign(paste(".rpart.parms", dev.cur(), sep = "."), list(uniform = uniform, - branch = branch, nspace = nspace, minbranch = minbranch), - envir = .GlobalEnv) - temp <- rpart:::rpartco(x) - xx <- temp$x - yy <- temp$y - temp1 <- range(xx) + diff(range(xx)) * c(-margin, margin) - temp2 <- range(yy) + diff(range(yy)) * c(-margin, margin) - #plot(temp1, temp2, type = "n", axes = FALSE, xlab = "", ylab = "", ...) - node <- as.numeric(row.names(x$frame)) - temp <- rpart:::rpart.branch(xx, yy, node, branch) - #if (branch > 0) text(xx[1L], yy[1L], "|") - #lines(c(temp$x), c(temp$y)) - - - dat <- data.frame( - stack(as.data.frame(temp$x)), - stack(as.data.frame(temp$y)) - )[, c("ind", "values", "values.1")] - dat - - dat2 <- cbind(head(dat, -1), tail(dat, -1)) - dat3 <- dat2[complete.cases(dat2), -4] - names(dat3) <- c("n", "x", "y", "xend", "yend") - dat3 - -} - - -#' Extract labels data frame from rpart object for plotting using ggplot. -#' @param model object of class "rpart", e.g. the output of rpart() -#' @param ... ignored -#' @return a list with two elements: $labels and $leaf_labels -#' @keywords internal -#' @seealso \code{\link{ggdendrogram}} -#' @family dendro_data methods -#' @family rpart functions -rpart_labels <- function (model, splits = TRUE, label, FUN = text, all = FALSE, pretty = NULL, - digits = getOption("digits") - 3, use.n = FALSE, fancy = FALSE, - fwidth = 0.8, fheight = 0.8, ...) -{ - require(rpart) - x <- model - if (!inherits(x, "rpart")) - stop("Not legitimate rpart") - if (!is.null(x$frame$splits)) - x <- rpconvert(x) - if (nrow(x$frame) <= 1) - stop("fit is not a tree, just a root") - frame <- x$frame - if (!missing(label)) - warning("argument 'label' is currently unused") - cxy <- par("cxy") - if (!is.null(srt <- list(...)$srt) && srt == 90) - cxy <- rev(cxy) - xy <- rpart:::rpartco(x) - node <- as.numeric(row.names(x$frame)) - is.left <- (node%%2 == 0) - node.left <- node[is.left] - parent <- match(node.left/2, node) - if (splits) { - left.child <- match(2 * node, node) - right.child <- match(node * 2 + 1, node) - rows <- labels(x, pretty = pretty) - if (fancy) { -# xytmp <- rpart.branch(x = xy$x, y = xy$y, node = node) -# leftptx <- (xytmp$x[2L, ] + xytmp$x[1L, ])/2 -# leftpty <- (xytmp$y[2L, ] + xytmp$y[1L, ])/2 -# rightptx <- (xytmp$x[3L, ] + xytmp$x[4, ])/2 -# rightpty <- (xytmp$y[3L, ] + xytmp$y[4L, ])/2 -# FUN(leftptx, leftpty + 0.52 * cxy[2L], rows[left.child[!is.na(left.child)]], ...) -# FUN(rightptx, rightpty - 0.52 * cxy[2L], rows[right.child[!is.na(right.child)]], ...) - } - else { - #FUN(xy$x, xy$y + 0.5 * cxy[2L], rows[left.child], ...) - labelSplits <- data.frame( - x = xy$x, - y = xy$y, - label = rows[left.child]) - labelSplits <- labelSplits[complete.cases(labelSplits), ] - labelSplits$type <= "splits" - } - } - leaves <- if (all) - rep(TRUE, nrow(frame)) - else frame$var == "" - ylevels <- attr(x, "ylevels") - stat <- if (is.null(frame$yval2)) - x$functions$text(yval = frame$yval[leaves], dev = frame$dev[leaves], - wt = frame$wt[leaves], ylevel = ylevels, digits = digits, - n = frame$n[leaves], use.n = use.n) - else x$functions$text(yval = frame$yval2[leaves, ], dev = frame$dev[leaves], - wt = frame$wt[leaves], ylevel = ylevels, digits = digits, - n = frame$n[leaves], use.n = use.n) - oval <- function(middlex, middley, a, b) { - theta <- seq(0, 2 * pi, pi/30) - newx <- middlex + a * cos(theta) - newy <- middley + b * sin(theta) - polygon(newx, newy, border = TRUE, col = 0) - } - rectangle <- function(middlex, middley, a, b) { - newx <- middlex + c(a, a, -a, -a) - newy <- middley + c(b, -b, -b, b) - polygon(newx, newy, border = TRUE, col = 0) - } -# if (fancy) { -# maxlen <- max(string.bounding.box(stat)$columns) + 1L -# maxht <- max(string.bounding.box(stat)$rows) + 1L -# if (fwidth < 1) -# a.length <- fwidth * cxy[1L] * maxlen -# else a.length <- fwidth * cxy[1L] -# if (fheight < 1) -# b.length <- fheight * cxy[2L] * maxht -# else b.length <- fheight * cxy[2L] -# for (i in parent) oval(xy$x[i], xy$y[i], a = sqrt(2) * -# a.length/2, b = sqrt(2) * b.length/2) -# child <- match(node[frame$var == ""], node) -# for (i in child) rectangle(xy$x[i], xy$y[i], a = a.length/2, b = b.length/2) -# } - if (fancy) - FUN(xy$x[leaves], xy$y[leaves] + 0.5 * cxy[2], stat, ...) - else { - #FUN(xy$x[leaves], xy$y[leaves] - 0.5 * cxy[2], stat, adj = 0.5, ...) - labelLeaves <- data.frame( - x = xy$x[leaves], - y = xy$y[leaves], - label = stat - ) - } - - list( - labels=labelSplits, - leaf_labels=labelLeaves - ) -} - +# Classification trees using rpart +# +# Author: Andrie +#---------------------------------------------------------------------------------- + + + +# Classification Tree with rpart + + +#' Extract data from classification tree object for plotting using ggplot. +#' +#' Extracts data to plot line segments and labels from a \code{\link[rpart]{rpart}} classification tree object. This data can then be manipulated or plotted, e.g. using \code{\link[ggplot2]{ggplot}}. +#' +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @method dendro_data rpart +#' @export +#' @return +#' A list of three data frames: +#' \item{segments}{a data frame containing the line segment data} +#' \item{labels}{a data frame containing the label text data} +#' \item{leaf_labels}{a data frame containing the leaf label text data} +#' @seealso \code{\link{ggdendrogram}} +#' @family dendro_data methods +#' @family rpart functions +#' @examples +#' require(rpart) +#' require(ggplot2) +#' fit <- rpart(Kyphosis ~ Age + Number + Start, method="class", data=kyphosis) +#' fitr <- dendro_data(fit) +#' ggplot() + +#' geom_segment(data=fitr$segments, aes(x=x, y=y, xend=xend, yend=yend)) + +#' geom_text(data=fitr$labels, aes(x=x, y=y, label=label)) + +#' geom_text(data=fitr$leaf_labels, aes(x=x, y=y, label=label)) + +#' theme_dendro() +dendro_data.rpart <- function(model, ...){ + segments <- rpart_segments(model, ) + labels <- rpart_labels(model, ...) + as.dendro( + segments = segments, + labels = labels$labels, + leaf_labels = labels$leaf_labels, + class="rpart" + ) +} + +#' Extract data frame from rpart object for plotting using ggplot. +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @keywords internal +#' @seealso \code{\link{ggdendrogram}} +#' @family rpart functions +rpart_segments <- function (model, ...) { + x <- model + uniform = FALSE + branch = 1 + compress = FALSE + nspace <- -1L + margin = 0 + minbranch = 0.3 + require(rpart) + if (!inherits(x, "rpart")) + stop("Not an rpart object") + if (!is.null(x$frame$splits)) + x <- rpconvert(x) + if (nrow(x$frame) <= 1L) + stop("fit is not a tree, just a root") + if (compress & missing(nspace)) + nspace <- branch + if (!compress) + nspace <- -1L + if (dev.cur() == 1L) + dev.new() + assign(paste(".rpart.parms", dev.cur(), sep = "."), list(uniform = uniform, + branch = branch, nspace = nspace, minbranch = minbranch), + envir = .GlobalEnv) + temp <- rpart:::rpartco(x) + xx <- temp$x + yy <- temp$y + temp1 <- range(xx) + diff(range(xx)) * c(-margin, margin) + temp2 <- range(yy) + diff(range(yy)) * c(-margin, margin) + #plot(temp1, temp2, type = "n", axes = FALSE, xlab = "", ylab = "", ...) + node <- as.numeric(row.names(x$frame)) + temp <- rpart:::rpart.branch(xx, yy, node, branch) + #if (branch > 0) text(xx[1L], yy[1L], "|") + #lines(c(temp$x), c(temp$y)) + + + dat <- data.frame( + stack(as.data.frame(temp$x)), + stack(as.data.frame(temp$y)) + )[, c("ind", "values", "values.1")] + dat + + dat2 <- cbind(head(dat, -1), tail(dat, -1)) + dat3 <- dat2[complete.cases(dat2), -4] + names(dat3) <- c("n", "x", "y", "xend", "yend") + dat3 + +} + + +#' Extract labels data frame from rpart object for plotting using ggplot. +#' @param model object of class "rpart", e.g. the output of rpart() +#' @param ... ignored +#' @return a list with two elements: $labels and $leaf_labels +#' @keywords internal +#' @seealso \code{\link{ggdendrogram}} +#' @family dendro_data methods +#' @family rpart functions +rpart_labels <- function (model, splits = TRUE, label, FUN = text, all = FALSE, pretty = NULL, + digits = getOption("digits") - 3, use.n = FALSE, fancy = FALSE, + fwidth = 0.8, fheight = 0.8, ...) +{ + require(rpart) + x <- model + if (!inherits(x, "rpart")) + stop("Not legitimate rpart") + if (!is.null(x$frame$splits)) + x <- rpconvert(x) + if (nrow(x$frame) <= 1) + stop("fit is not a tree, just a root") + frame <- x$frame + if (!missing(label)) + warning("argument 'label' is currently unused") + cxy <- par("cxy") + if (!is.null(srt <- list(...)$srt) && srt == 90) + cxy <- rev(cxy) + xy <- rpart:::rpartco(x) + node <- as.numeric(row.names(x$frame)) + is.left <- (node%%2 == 0) + node.left <- node[is.left] + parent <- match(node.left/2, node) + if (splits) { + left.child <- match(2 * node, node) + right.child <- match(node * 2 + 1, node) + rows <- labels(x, pretty = pretty) + if (fancy) { +# xytmp <- rpart.branch(x = xy$x, y = xy$y, node = node) +# leftptx <- (xytmp$x[2L, ] + xytmp$x[1L, ])/2 +# leftpty <- (xytmp$y[2L, ] + xytmp$y[1L, ])/2 +# rightptx <- (xytmp$x[3L, ] + xytmp$x[4, ])/2 +# rightpty <- (xytmp$y[3L, ] + xytmp$y[4L, ])/2 +# FUN(leftptx, leftpty + 0.52 * cxy[2L], rows[left.child[!is.na(left.child)]], ...) +# FUN(rightptx, rightpty - 0.52 * cxy[2L], rows[right.child[!is.na(right.child)]], ...) + } + else { + #FUN(xy$x, xy$y + 0.5 * cxy[2L], rows[left.child], ...) + labelSplits <- data.frame( + x = xy$x, + y = xy$y, + label = rows[left.child]) + labelSplits <- labelSplits[complete.cases(labelSplits), ] + labelSplits$type <= "splits" + } + } + leaves <- if (all) + rep(TRUE, nrow(frame)) + else frame$var == "" + ylevels <- attr(x, "ylevels") + stat <- if (is.null(frame$yval2)) + x$functions$text(yval = frame$yval[leaves], dev = frame$dev[leaves], + wt = frame$wt[leaves], ylevel = ylevels, digits = digits, + n = frame$n[leaves], use.n = use.n) + else x$functions$text(yval = frame$yval2[leaves, ], dev = frame$dev[leaves], + wt = frame$wt[leaves], ylevel = ylevels, digits = digits, + n = frame$n[leaves], use.n = use.n) + oval <- function(middlex, middley, a, b) { + theta <- seq(0, 2 * pi, pi/30) + newx <- middlex + a * cos(theta) + newy <- middley + b * sin(theta) + polygon(newx, newy, border = TRUE, col = 0) + } + rectangle <- function(middlex, middley, a, b) { + newx <- middlex + c(a, a, -a, -a) + newy <- middley + c(b, -b, -b, b) + polygon(newx, newy, border = TRUE, col = 0) + } +# if (fancy) { +# maxlen <- max(string.bounding.box(stat)$columns) + 1L +# maxht <- max(string.bounding.box(stat)$rows) + 1L +# if (fwidth < 1) +# a.length <- fwidth * cxy[1L] * maxlen +# else a.length <- fwidth * cxy[1L] +# if (fheight < 1) +# b.length <- fheight * cxy[2L] * maxht +# else b.length <- fheight * cxy[2L] +# for (i in parent) oval(xy$x[i], xy$y[i], a = sqrt(2) * +# a.length/2, b = sqrt(2) * b.length/2) +# child <- match(node[frame$var == ""], node) +# for (i in child) rectangle(xy$x[i], xy$y[i], a = a.length/2, b = b.length/2) +# } + if (fancy) + FUN(xy$x[leaves], xy$y[leaves] + 0.5 * cxy[2], stat, ...) + else { + #FUN(xy$x[leaves], xy$y[leaves] - 0.5 * cxy[2], stat, adj = 0.5, ...) + labelLeaves <- data.frame( + x = xy$x[leaves], + y = xy$y[leaves], + label = stat + ) + } + + list( + labels=labelSplits, + leaf_labels=labelLeaves + ) +} + diff --git a/R/tree.r b/R/tree.r index 7b2d7e6..937b201 100644 --- a/R/tree.r +++ b/R/tree.r @@ -1,149 +1,149 @@ -# Plots tree object in ggplot2 - -#' Extract data from regression tree object for plotting using ggplot. -#' -#' Extracts data to plot line segments and labels from a \code{\link[tree]{tree}} object. This data can then be manipulated or plotted, e.g. using \code{\link[ggplot2]{ggplot}}. -#' -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @method dendro_data tree -#' @export -#' @return -#' A list of three data frames: -#' \item{segments}{a data frame containing the line segment data} -#' \item{labels}{a data frame containing the label text data} -#' \item{leaf_labels}{a data frame containing the leaf label text data} -#' @seealso \code{\link{ggdendrogram}} -#' @family dendro_data methods -#' @family tree functions -#' @examples -#' require(tree) -#' require(ggplot2) -#' require(MASS) -#' data(cpus, package="MASS") -#' cpus.ltr <- tree(log10(perf) ~ syct+mmin+mmax+cach+chmin+chmax, cpus) -#' tree_data <- dendro_data(cpus.ltr) -#' ggplot(segment(tree_data)) + -#' geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), -#' colour="blue", alpha=0.5) + -#' scale_size("n") + -#' geom_text(data=label(tree_data), -#' aes(x=x, y=y, label=label), vjust=-0.5, size=4) + -#' geom_text(data=leaf_label(tree_data), -#' aes(x=x, y=y, label=label), vjust=0.5, size=3) + -#' theme_dendro() -dendro_data.tree <- function(model, ...){ - require(tree) - labels <- tree_labels(model, ...) - as.dendro( - segments = tree_segments(model, ...), - labels = labels$labels, - leaf_labels = labels$leaf_labels, - class="tree" - ) -} - - - -#' Extract data frame from tree object for plotting using ggplot. -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @keywords internal -#' @seealso \code{\link{ggdendrogram}} -#' @family tree functions -tree_segments <- function(model, ...){ - require(tree) - # Uses tree:::treeco to extract data frame of plot locations - xy <- tree:::treeco(model) - n <- model$frame$n - - # Lines copied from tree:::treepl - x <- xy$x - y <- xy$y - node = as.numeric(row.names(model$frame)) - parent <- match((node%/%2), node) - sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) - - linev <- data.frame(x=x, y=y, xend=x, yend=y[parent], n=n) - lineh <- data.frame(x=x[parent], y=y[parent], xend=x, yend=y[parent], n=n) - - rbind(linev[-1,], lineh[-1,]) -} - -#' Extract labels data frame from tree object for plotting using ggplot. -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @return a list with two elements: $labels and $leaf_labels -#' @keywords internal -#' @seealso \code{\link{ggdendrogram}} -#' @family tree functions -tree_labels <- function(model, ...){ - require(tree) - # Uses tree:::treeco to extract data frame of plot locations - xy <- tree:::treeco(model) - label <- model$frame$var - yval <- model$frame$yval - sleft <- model$frame$splits.cutleft - sright <- model$frame$splits.right - - # Lines copied from tree:::treepl - x <- xy$x - y <- xy$y - node = as.numeric(row.names(model$frame)) - parent <- match((node%/%2), node) - sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) - - # Extract labels - data <- data.frame(x=x, y=y, label=label) - data <- data[data$label != "",] - labels <- as.data.frame(data) - - # Extract leaf labels - data <- data.frame(x, y, label, yval) - data <- data[data$label == "",] - if(is.numeric(data$yval)){ - data$label <- round(data$yval, 2) - } else { - data$label <- data$yval - } - leaf_labels <- as.data.frame(data) - - list( - labels = labels, - leaf_labels = leaf_labels - ) -} - -#' Extract labels data frame from tree object for plotting using ggplot. -#' -#' Extract labels data frame from tree object for plotting using ggplot -#' -#' @param model object of class "tree", e.g. the output of tree() -#' @param ... ignored -#' @keywords internal -#' @seealso \code{\link{ggdendrogram}} -#' @family tree functions -get_data_tree_leaf_labels <- function(model, ...){ - require(tree) - # Uses tree:::treeco to extract data frame of plot locations - xy <- tree:::treeco(model) - label <- model$frame$var - yval <- model$frame$yval - sleft <- model$frame$splits.cutleft - sright <- model$frame$splits.right - - # Lines copied from tree:::treepl - x <- xy$x - y <- xy$y - node = as.numeric(row.names(model$frame)) - parent <- match((node%/%2), node) - sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) - - data <- data.frame(x, y, label, yval) - data <- data[data$label == "",] - data$label <- round(data$yval, 2) - data -} - - - +# Plots tree object in ggplot2 + +#' Extract data from regression tree object for plotting using ggplot. +#' +#' Extracts data to plot line segments and labels from a \code{\link[tree]{tree}} object. This data can then be manipulated or plotted, e.g. using \code{\link[ggplot2]{ggplot}}. +#' +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @method dendro_data tree +#' @export +#' @return +#' A list of three data frames: +#' \item{segments}{a data frame containing the line segment data} +#' \item{labels}{a data frame containing the label text data} +#' \item{leaf_labels}{a data frame containing the leaf label text data} +#' @seealso \code{\link{ggdendrogram}} +#' @family dendro_data methods +#' @family tree functions +#' @examples +#' require(tree) +#' require(ggplot2) +#' require(MASS) +#' data(cpus, package="MASS") +#' cpus.ltr <- tree(log10(perf) ~ syct+mmin+mmax+cach+chmin+chmax, cpus) +#' tree_data <- dendro_data(cpus.ltr) +#' ggplot(segment(tree_data)) + +#' geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), +#' colour="blue", alpha=0.5) + +#' scale_size("n") + +#' geom_text(data=label(tree_data), +#' aes(x=x, y=y, label=label), vjust=-0.5, size=4) + +#' geom_text(data=leaf_label(tree_data), +#' aes(x=x, y=y, label=label), vjust=0.5, size=3) + +#' theme_dendro() +dendro_data.tree <- function(model, ...){ + require(tree) + labels <- tree_labels(model, ...) + as.dendro( + segments = tree_segments(model, ...), + labels = labels$labels, + leaf_labels = labels$leaf_labels, + class="tree" + ) +} + + + +#' Extract data frame from tree object for plotting using ggplot. +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @keywords internal +#' @seealso \code{\link{ggdendrogram}} +#' @family tree functions +tree_segments <- function(model, ...){ + require(tree) + # Uses tree:::treeco to extract data frame of plot locations + xy <- tree:::treeco(model) + n <- model$frame$n + + # Lines copied from tree:::treepl + x <- xy$x + y <- xy$y + node = as.numeric(row.names(model$frame)) + parent <- match((node%/%2), node) + sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) + + linev <- data.frame(x=x, y=y, xend=x, yend=y[parent], n=n) + lineh <- data.frame(x=x[parent], y=y[parent], xend=x, yend=y[parent], n=n) + + rbind(linev[-1,], lineh[-1,]) +} + +#' Extract labels data frame from tree object for plotting using ggplot. +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @return a list with two elements: $labels and $leaf_labels +#' @keywords internal +#' @seealso \code{\link{ggdendrogram}} +#' @family tree functions +tree_labels <- function(model, ...){ + require(tree) + # Uses tree:::treeco to extract data frame of plot locations + xy <- tree:::treeco(model) + label <- model$frame$var + yval <- model$frame$yval + sleft <- model$frame$splits.cutleft + sright <- model$frame$splits.right + + # Lines copied from tree:::treepl + x <- xy$x + y <- xy$y + node = as.numeric(row.names(model$frame)) + parent <- match((node%/%2), node) + sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) + + # Extract labels + data <- data.frame(x=x, y=y, label=label) + data <- data[data$label != "",] + labels <- as.data.frame(data) + + # Extract leaf labels + data <- data.frame(x, y, label, yval) + data <- data[data$label == "",] + if(is.numeric(data$yval)){ + data$label <- round(data$yval, 2) + } else { + data$label <- data$yval + } + leaf_labels <- as.data.frame(data) + + list( + labels = labels, + leaf_labels = leaf_labels + ) +} + +#' Extract labels data frame from tree object for plotting using ggplot. +#' +#' Extract labels data frame from tree object for plotting using ggplot +#' +#' @param model object of class "tree", e.g. the output of tree() +#' @param ... ignored +#' @keywords internal +#' @seealso \code{\link{ggdendrogram}} +#' @family tree functions +get_data_tree_leaf_labels <- function(model, ...){ + require(tree) + # Uses tree:::treeco to extract data frame of plot locations + xy <- tree:::treeco(model) + label <- model$frame$var + yval <- model$frame$yval + sleft <- model$frame$splits.cutleft + sright <- model$frame$splits.right + + # Lines copied from tree:::treepl + x <- xy$x + y <- xy$y + node = as.numeric(row.names(model$frame)) + parent <- match((node%/%2), node) + sibling <- match(ifelse(node%%2, node - 1L, node + 1L), node) + + data <- data.frame(x, y, label, yval) + data <- data[data$label == "",] + data$label <- round(data$yval, 2) + data +} + + + diff --git a/man/dendro_data.dendrogram.Rd b/man/dendro_data.dendrogram.Rd index dc8e447..3c00b51 100644 --- a/man/dendro_data.dendrogram.Rd +++ b/man/dendro_data.dendrogram.Rd @@ -1,67 +1,67 @@ -\name{dendro_data.dendrogram} -\alias{dendro_data.dendrogram} -\alias{dendro_data.hclust} -\title{Extract line segment and label data from dendrogram or hclust object.} -\usage{ - \method{dendro_data}{dendrogram} (model, - type = c("rectangle", "triangle"), ...) -} -\arguments{ - \item{model}{object of class "dendrogram", e.g. the - output of as.dendrogram()} - - \item{type}{The type of plot, indicating the shape of the - dendrogram. "rectangle" will draw rectangular lines, - while "triangle" will draw triangular lines.} - - \item{...}{ignored} -} -\value{ - A list with the following elements: \item{segments}{Line - segment data} \item{labels}{Label data} -} -\description{ - Extract line segment and label data from dendrogram or - hclust object. Results are stored in a list of data - frames containing line segment data and label data. -} -\examples{ -require(ggplot2) -# -# Demonstrate dendro_data.dendrogram -# -hc <- hclust(dist(USArrests), "ave") -dhc <- as.dendrogram(hc) -# Rectangular lines -ddata <- dendro_data(dhc, type="rectangle") -ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + - coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() -# Triangular lines -ddata <- dendro_data(dhc, type="triangle") -ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + theme_dendro() -# -# Demonstrate dendro_data.hclust -# -require(ggplot2) -hc <- hclust(dist(USArrests), "ave") -# Rectangular lines -hcdata <- dendro_data(hc, type="rectangle") -ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + - coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() -# Triangular lines -hcdata <- dendro_data(hc, type="triangle") -ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + - theme_dendro() -} -\seealso{ - \code{\link{ggdendrogram}} - - Other dendro_data methods: - \code{\link{dendro_data.rpart}}, - \code{\link{dendro_data.tree}}, - \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} - - Other dendrogram/hclust functions: - \code{\link{dendrogram_data}} -} - +\name{dendro_data.dendrogram} +\alias{dendro_data.dendrogram} +\alias{dendro_data.hclust} +\title{Extract line segment and label data from dendrogram or hclust object.} +\usage{ + \method{dendro_data}{dendrogram} (model, + type = c("rectangle", "triangle"), ...) +} +\arguments{ + \item{model}{object of class "dendrogram", e.g. the + output of as.dendrogram()} + + \item{type}{The type of plot, indicating the shape of the + dendrogram. "rectangle" will draw rectangular lines, + while "triangle" will draw triangular lines.} + + \item{...}{ignored} +} +\value{ + A list with the following elements: \item{segments}{Line + segment data} \item{labels}{Label data} +} +\description{ + Extract line segment and label data from dendrogram or + hclust object. Results are stored in a list of data + frames containing line segment data and label data. +} +\examples{ +require(ggplot2) +# +# Demonstrate dendro_data.dendrogram +# +hc <- hclust(dist(USArrests), "ave") +dhc <- as.dendrogram(hc) +# Rectangular lines +ddata <- dendro_data(dhc, type="rectangle") +ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + + coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() +# Triangular lines +ddata <- dendro_data(dhc, type="triangle") +ggplot(segment(ddata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + theme_dendro() +# +# Demonstrate dendro_data.hclust +# +require(ggplot2) +hc <- hclust(dist(USArrests), "ave") +# Rectangular lines +hcdata <- dendro_data(hc, type="rectangle") +ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + + coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + theme_dendro() +# Triangular lines +hcdata <- dendro_data(hc, type="triangle") +ggplot(segment(hcdata)) + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + + theme_dendro() +} +\seealso{ + \code{\link{ggdendrogram}} + + Other dendro_data methods: + \code{\link{dendro_data.rpart}}, + \code{\link{dendro_data.tree}}, + \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} + + Other dendrogram/hclust functions: + \code{\link{dendrogram_data}} +} + diff --git a/man/dendro_data.rpart.Rd b/man/dendro_data.rpart.Rd index 1dcb43a..5d926f3 100644 --- a/man/dendro_data.rpart.Rd +++ b/man/dendro_data.rpart.Rd @@ -1,48 +1,48 @@ -\name{dendro_data.rpart} -\alias{dendro_data.rpart} -\title{Extract data from classification tree object for plotting using ggplot.} -\usage{ - \method{dendro_data}{rpart} (model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\value{ - A list of three data frames: \item{segments}{a data frame - containing the line segment data} \item{labels}{a data - frame containing the label text data} - \item{leaf_labels}{a data frame containing the leaf label - text data} -} -\description{ - Extracts data to plot line segments and labels from a - \code{\link[rpart]{rpart}} classification tree object. - This data can then be manipulated or plotted, e.g. using - \code{\link[ggplot2]{ggplot}}. -} -\examples{ -require(rpart) -require(ggplot2) -fit <- rpart(Kyphosis ~ Age + Number + Start, method="class", data=kyphosis) -fitr <- dendro_data(fit) -ggplot() + - geom_segment(data=fitr$segments, aes(x=x, y=y, xend=xend, yend=yend)) + - geom_text(data=fitr$labels, aes(x=x, y=y, label=label)) + - geom_text(data=fitr$leaf_labels, aes(x=x, y=y, label=label)) + - theme_dendro() -} -\seealso{ - \code{\link{ggdendrogram}} - - Other dendro_data methods: - \code{\link{dendro_data.dendrogram}}, - \code{\link{dendro_data.tree}}, - \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} - - Other rpart functions: \code{\link{rpart_labels}}, - \code{\link{rpart_segments}} -} - +\name{dendro_data.rpart} +\alias{dendro_data.rpart} +\title{Extract data from classification tree object for plotting using ggplot.} +\usage{ + \method{dendro_data}{rpart} (model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\value{ + A list of three data frames: \item{segments}{a data frame + containing the line segment data} \item{labels}{a data + frame containing the label text data} + \item{leaf_labels}{a data frame containing the leaf label + text data} +} +\description{ + Extracts data to plot line segments and labels from a + \code{\link[rpart]{rpart}} classification tree object. + This data can then be manipulated or plotted, e.g. using + \code{\link[ggplot2]{ggplot}}. +} +\examples{ +require(rpart) +require(ggplot2) +fit <- rpart(Kyphosis ~ Age + Number + Start, method="class", data=kyphosis) +fitr <- dendro_data(fit) +ggplot() + + geom_segment(data=fitr$segments, aes(x=x, y=y, xend=xend, yend=yend)) + + geom_text(data=fitr$labels, aes(x=x, y=y, label=label)) + + geom_text(data=fitr$leaf_labels, aes(x=x, y=y, label=label)) + + theme_dendro() +} +\seealso{ + \code{\link{ggdendrogram}} + + Other dendro_data methods: + \code{\link{dendro_data.dendrogram}}, + \code{\link{dendro_data.tree}}, + \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} + + Other rpart functions: \code{\link{rpart_labels}}, + \code{\link{rpart_segments}} +} + diff --git a/man/dendro_data.tree.Rd b/man/dendro_data.tree.Rd index 3bf3920..3636f3a 100644 --- a/man/dendro_data.tree.Rd +++ b/man/dendro_data.tree.Rd @@ -1,55 +1,55 @@ -\name{dendro_data.tree} -\alias{dendro_data.tree} -\title{Extract data from regression tree object for plotting using ggplot.} -\usage{ - \method{dendro_data}{tree} (model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\value{ - A list of three data frames: \item{segments}{a data frame - containing the line segment data} \item{labels}{a data - frame containing the label text data} - \item{leaf_labels}{a data frame containing the leaf label - text data} -} -\description{ - Extracts data to plot line segments and labels from a - \code{\link[tree]{tree}} object. This data can then be - manipulated or plotted, e.g. using - \code{\link[ggplot2]{ggplot}}. -} -\examples{ -require(tree) -require(ggplot2) -require(MASS) -data(cpus, package="MASS") -cpus.ltr <- tree(log10(perf) ~ syct+mmin+mmax+cach+chmin+chmax, cpus) -tree_data <- dendro_data(cpus.ltr) -ggplot(segment(tree_data)) + - geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), - colour="blue", alpha=0.5) + - scale_size("n") + - geom_text(data=label(tree_data), - aes(x=x, y=y, label=label), vjust=-0.5, size=4) + - geom_text(data=leaf_label(tree_data), - aes(x=x, y=y, label=label), vjust=0.5, size=3) + - theme_dendro() -} -\seealso{ - \code{\link{ggdendrogram}} - - Other dendro_data methods: - \code{\link{dendro_data.dendrogram}}, - \code{\link{dendro_data.rpart}}, - \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} - - Other tree functions: - \code{\link{get_data_tree_leaf_labels}}, - \code{\link{tree_labels}}, \code{\link{tree_segments}} -} - +\name{dendro_data.tree} +\alias{dendro_data.tree} +\title{Extract data from regression tree object for plotting using ggplot.} +\usage{ + \method{dendro_data}{tree} (model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\value{ + A list of three data frames: \item{segments}{a data frame + containing the line segment data} \item{labels}{a data + frame containing the label text data} + \item{leaf_labels}{a data frame containing the leaf label + text data} +} +\description{ + Extracts data to plot line segments and labels from a + \code{\link[tree]{tree}} object. This data can then be + manipulated or plotted, e.g. using + \code{\link[ggplot2]{ggplot}}. +} +\examples{ +require(tree) +require(ggplot2) +require(MASS) +data(cpus, package="MASS") +cpus.ltr <- tree(log10(perf) ~ syct+mmin+mmax+cach+chmin+chmax, cpus) +tree_data <- dendro_data(cpus.ltr) +ggplot(segment(tree_data)) + + geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), + colour="blue", alpha=0.5) + + scale_size("n") + + geom_text(data=label(tree_data), + aes(x=x, y=y, label=label), vjust=-0.5, size=4) + + geom_text(data=leaf_label(tree_data), + aes(x=x, y=y, label=label), vjust=0.5, size=3) + + theme_dendro() +} +\seealso{ + \code{\link{ggdendrogram}} + + Other dendro_data methods: + \code{\link{dendro_data.dendrogram}}, + \code{\link{dendro_data.rpart}}, + \code{\link{dendrogram_data}}, \code{\link{rpart_labels}} + + Other tree functions: + \code{\link{get_data_tree_leaf_labels}}, + \code{\link{tree_labels}}, \code{\link{tree_segments}} +} + diff --git a/man/dendrogram_data.Rd b/man/dendrogram_data.Rd index 322387a..84e4cb9 100644 --- a/man/dendrogram_data.Rd +++ b/man/dendrogram_data.Rd @@ -1,35 +1,35 @@ -\name{dendrogram_data} -\alias{dendrogram_data} -\title{Extract data frame from dendrogram object for plotting using ggplot.} -\usage{ - dendrogram_data(x, type = c("rectangle", "triangle"), - ...) -} -\arguments{ - \item{x}{object of class "dendrogram", e.g. the output of - as.dendrogram()} - - \item{type}{The type of plot, indicating the shape of the - dendrogram. "Rectangle" will draw rectangular lines, - while "triangle" will draw triangular lines.} - - \item{...}{ignored} -} -\description{ - Extract data frame from dendrogram object for plotting - using ggplot -} -\seealso{ - \code{\link{ggdendrogram}} - - Other dendro_data methods: - \code{\link{dendro_data.dendrogram}}, - \code{\link{dendro_data.rpart}}, - \code{\link{dendro_data.tree}}, - \code{\link{rpart_labels}} - - Other dendrogram/hclust functions: - \code{\link{dendro_data.dendrogram}} -} -\keyword{internal} - +\name{dendrogram_data} +\alias{dendrogram_data} +\title{Extract data frame from dendrogram object for plotting using ggplot.} +\usage{ + dendrogram_data(x, type = c("rectangle", "triangle"), + ...) +} +\arguments{ + \item{x}{object of class "dendrogram", e.g. the output of + as.dendrogram()} + + \item{type}{The type of plot, indicating the shape of the + dendrogram. "Rectangle" will draw rectangular lines, + while "triangle" will draw triangular lines.} + + \item{...}{ignored} +} +\description{ + Extract data frame from dendrogram object for plotting + using ggplot +} +\seealso{ + \code{\link{ggdendrogram}} + + Other dendro_data methods: + \code{\link{dendro_data.dendrogram}}, + \code{\link{dendro_data.rpart}}, + \code{\link{dendro_data.tree}}, + \code{\link{rpart_labels}} + + Other dendrogram/hclust functions: + \code{\link{dendro_data.dendrogram}} +} +\keyword{internal} + diff --git a/man/get_data_tree_leaf_labels.Rd b/man/get_data_tree_leaf_labels.Rd index 4100211..dea850c 100644 --- a/man/get_data_tree_leaf_labels.Rd +++ b/man/get_data_tree_leaf_labels.Rd @@ -1,24 +1,24 @@ -\name{get_data_tree_leaf_labels} -\alias{get_data_tree_leaf_labels} -\title{Extract labels data frame from tree object for plotting using ggplot.} -\usage{ - get_data_tree_leaf_labels(model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\description{ - Extract labels data frame from tree object for plotting - using ggplot -} -\seealso{ - \code{\link{ggdendrogram}} - - Other tree functions: \code{\link{dendro_data.tree}}, - \code{\link{tree_labels}}, \code{\link{tree_segments}} -} -\keyword{internal} - +\name{get_data_tree_leaf_labels} +\alias{get_data_tree_leaf_labels} +\title{Extract labels data frame from tree object for plotting using ggplot.} +\usage{ + get_data_tree_leaf_labels(model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\description{ + Extract labels data frame from tree object for plotting + using ggplot +} +\seealso{ + \code{\link{ggdendrogram}} + + Other tree functions: \code{\link{dendro_data.tree}}, + \code{\link{tree_labels}}, \code{\link{tree_segments}} +} +\keyword{internal} + diff --git a/man/ggdendrogram.Rd b/man/ggdendrogram.Rd index db31cb3..0259816 100644 --- a/man/ggdendrogram.Rd +++ b/man/ggdendrogram.Rd @@ -1,48 +1,48 @@ -\name{ggdendrogram} -\alias{ggdendrogram} -\title{Creates dendrogram plot using ggplot.} -\usage{ - ggdendrogram(data, segments = TRUE, labels = TRUE, - leaf_labels = TRUE, rotate = FALSE, - theme_dendro = TRUE, ...) -} -\arguments{ - \item{data}{Either a dendro object or an object that can - be coerced to class dendro using the - \code{\link{dendro_data}} function, i.e. objects of class - dendrogram, hclust or tree} - - \item{segments}{If TRUE, show line segments} - - \item{labels}{if TRUE, shows segment labels} - - \item{leaf_labels}{if TRUE, shows leaf labels} - - \item{rotate}{if TRUE, rotates plot by 90 degrees} - - \item{theme_dendro}{if TRUE, applies a blank theme to - plot (see \code{\link{theme_dendro}})} - - \item{...}{other parameters passed to - \code{\link[ggplot2]{geom_text}}} -} -\value{ - A \code{\link[ggplot2]{ggplot}} object -} -\description{ - Creates dendrogram plot using ggplot. -} -\examples{ -library(ggplot2) -hc <- hclust(dist(USArrests), "ave") -### demonstrate plotting directly from object class hclust -ggdendrogram(hc, rotate=FALSE) -ggdendrogram(hc, rotate=TRUE) -### demonstrate converting hclust to dendro using dendro_data first -hcdata <- dendro_data(hc) -ggdendrogram(hcdata, rotate=TRUE, size=2) + opts(title="Dendrogram in ggplot2") -} -\seealso{ - \code{\link{dendro_data}} -} - +\name{ggdendrogram} +\alias{ggdendrogram} +\title{Creates dendrogram plot using ggplot.} +\usage{ + ggdendrogram(data, segments = TRUE, labels = TRUE, + leaf_labels = TRUE, rotate = FALSE, + theme_dendro = TRUE, ...) +} +\arguments{ + \item{data}{Either a dendro object or an object that can + be coerced to class dendro using the + \code{\link{dendro_data}} function, i.e. objects of class + dendrogram, hclust or tree} + + \item{segments}{If TRUE, show line segments} + + \item{labels}{if TRUE, shows segment labels} + + \item{leaf_labels}{if TRUE, shows leaf labels} + + \item{rotate}{if TRUE, rotates plot by 90 degrees} + + \item{theme_dendro}{if TRUE, applies a blank theme to + plot (see \code{\link{theme_dendro}})} + + \item{...}{other parameters passed to + \code{\link[ggplot2]{geom_text}}} +} +\value{ + A \code{\link[ggplot2]{ggplot}} object +} +\description{ + Creates dendrogram plot using ggplot. +} +\examples{ +library(ggplot2) +hc <- hclust(dist(USArrests), "ave") +### demonstrate plotting directly from object class hclust +ggdendrogram(hc, rotate=FALSE) +ggdendrogram(hc, rotate=TRUE) +### demonstrate converting hclust to dendro using dendro_data first +hcdata <- dendro_data(hc) +ggdendrogram(hcdata, rotate=TRUE, size=2) + opts(title="Dendrogram in ggplot2") +} +\seealso{ + \code{\link{dendro_data}} +} + diff --git a/man/rpart_labels.Rd b/man/rpart_labels.Rd index 9a0d373..069e6a8 100644 --- a/man/rpart_labels.Rd +++ b/man/rpart_labels.Rd @@ -1,36 +1,36 @@ -\name{rpart_labels} -\alias{rpart_labels} -\title{Extract labels data frame from rpart object for plotting using ggplot.} -\usage{ - rpart_labels(model, splits = TRUE, label, FUN = text, - all = FALSE, pretty = NULL, - digits = getOption("digits") - 3, use.n = FALSE, - fancy = FALSE, fwidth = 0.8, fheight = 0.8, ...) -} -\arguments{ - \item{model}{object of class "rpart", e.g. the output of - rpart()} - - \item{...}{ignored} -} -\value{ - a list with two elements: $labels and $leaf_labels -} -\description{ - Extract labels data frame from rpart object for plotting - using ggplot. -} -\seealso{ - \code{\link{ggdendrogram}} - - Other dendro_data methods: - \code{\link{dendro_data.dendrogram}}, - \code{\link{dendro_data.rpart}}, - \code{\link{dendro_data.tree}}, - \code{\link{dendrogram_data}} - - Other rpart functions: \code{\link{dendro_data.rpart}}, - \code{\link{rpart_segments}} -} -\keyword{internal} - +\name{rpart_labels} +\alias{rpart_labels} +\title{Extract labels data frame from rpart object for plotting using ggplot.} +\usage{ + rpart_labels(model, splits = TRUE, label, FUN = text, + all = FALSE, pretty = NULL, + digits = getOption("digits") - 3, use.n = FALSE, + fancy = FALSE, fwidth = 0.8, fheight = 0.8, ...) +} +\arguments{ + \item{model}{object of class "rpart", e.g. the output of + rpart()} + + \item{...}{ignored} +} +\value{ + a list with two elements: $labels and $leaf_labels +} +\description{ + Extract labels data frame from rpart object for plotting + using ggplot. +} +\seealso{ + \code{\link{ggdendrogram}} + + Other dendro_data methods: + \code{\link{dendro_data.dendrogram}}, + \code{\link{dendro_data.rpart}}, + \code{\link{dendro_data.tree}}, + \code{\link{dendrogram_data}} + + Other rpart functions: \code{\link{dendro_data.rpart}}, + \code{\link{rpart_segments}} +} +\keyword{internal} + diff --git a/man/rpart_segments.Rd b/man/rpart_segments.Rd index 0d717c8..a582bb6 100644 --- a/man/rpart_segments.Rd +++ b/man/rpart_segments.Rd @@ -1,24 +1,24 @@ -\name{rpart_segments} -\alias{rpart_segments} -\title{Extract data frame from rpart object for plotting using ggplot.} -\usage{ - rpart_segments(model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\description{ - Extract data frame from rpart object for plotting using - ggplot. -} -\seealso{ - \code{\link{ggdendrogram}} - - Other rpart functions: \code{\link{dendro_data.rpart}}, - \code{\link{rpart_labels}} -} -\keyword{internal} - +\name{rpart_segments} +\alias{rpart_segments} +\title{Extract data frame from rpart object for plotting using ggplot.} +\usage{ + rpart_segments(model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\description{ + Extract data frame from rpart object for plotting using + ggplot. +} +\seealso{ + \code{\link{ggdendrogram}} + + Other rpart functions: \code{\link{dendro_data.rpart}}, + \code{\link{rpart_labels}} +} +\keyword{internal} + diff --git a/man/theme_dendro.Rd b/man/theme_dendro.Rd index 9f66b62..3ab2a42 100644 --- a/man/theme_dendro.Rd +++ b/man/theme_dendro.Rd @@ -1,13 +1,13 @@ -\name{theme_dendro} -\alias{theme_dendro} -\title{Creates completely blank theme in ggplot.} -\usage{ - theme_dendro() -} -\description{ - Sets most of the \code{ggplot} options to blank, by - returning blank \code{opts} for the panel grid, panel - background, axis title, axis text, axis line and axis - ticks. -} - +\name{theme_dendro} +\alias{theme_dendro} +\title{Creates completely blank theme in ggplot.} +\usage{ + theme_dendro() +} +\description{ + Sets most of the \code{ggplot} options to blank, by + returning blank \code{opts} for the panel grid, panel + background, axis title, axis text, axis line and axis + ticks. +} + diff --git a/man/tree_labels.Rd b/man/tree_labels.Rd index d335177..e4c6fc4 100644 --- a/man/tree_labels.Rd +++ b/man/tree_labels.Rd @@ -1,28 +1,28 @@ -\name{tree_labels} -\alias{tree_labels} -\title{Extract labels data frame from tree object for plotting using ggplot.} -\usage{ - tree_labels(model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\value{ - a list with two elements: $labels and $leaf_labels -} -\description{ - Extract labels data frame from tree object for plotting - using ggplot. -} -\seealso{ - \code{\link{ggdendrogram}} - - Other tree functions: \code{\link{dendro_data.tree}}, - \code{\link{get_data_tree_leaf_labels}}, - \code{\link{tree_segments}} -} -\keyword{internal} - +\name{tree_labels} +\alias{tree_labels} +\title{Extract labels data frame from tree object for plotting using ggplot.} +\usage{ + tree_labels(model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\value{ + a list with two elements: $labels and $leaf_labels +} +\description{ + Extract labels data frame from tree object for plotting + using ggplot. +} +\seealso{ + \code{\link{ggdendrogram}} + + Other tree functions: \code{\link{dendro_data.tree}}, + \code{\link{get_data_tree_leaf_labels}}, + \code{\link{tree_segments}} +} +\keyword{internal} + diff --git a/man/tree_segments.Rd b/man/tree_segments.Rd index 43e8d31..e7008d0 100644 --- a/man/tree_segments.Rd +++ b/man/tree_segments.Rd @@ -1,25 +1,25 @@ -\name{tree_segments} -\alias{tree_segments} -\title{Extract data frame from tree object for plotting using ggplot.} -\usage{ - tree_segments(model, ...) -} -\arguments{ - \item{model}{object of class "tree", e.g. the output of - tree()} - - \item{...}{ignored} -} -\description{ - Extract data frame from tree object for plotting using - ggplot. -} -\seealso{ - \code{\link{ggdendrogram}} - - Other tree functions: \code{\link{dendro_data.tree}}, - \code{\link{get_data_tree_leaf_labels}}, - \code{\link{tree_labels}} -} -\keyword{internal} - +\name{tree_segments} +\alias{tree_segments} +\title{Extract data frame from tree object for plotting using ggplot.} +\usage{ + tree_segments(model, ...) +} +\arguments{ + \item{model}{object of class "tree", e.g. the output of + tree()} + + \item{...}{ignored} +} +\description{ + Extract data frame from tree object for plotting using + ggplot. +} +\seealso{ + \code{\link{ggdendrogram}} + + Other tree functions: \code{\link{dendro_data.tree}}, + \code{\link{get_data_tree_leaf_labels}}, + \code{\link{tree_labels}} +} +\keyword{internal} + diff --git a/vignettes/ggdendro.Rnw b/vignettes/ggdendro.Rnw index 2cc2eb6..f2f79a5 100644 --- a/vignettes/ggdendro.Rnw +++ b/vignettes/ggdendro.Rnw @@ -1,212 +1,212 @@ - -% \VignetteIndexEntry{Plotting dendrograms and tree diagrams with ggplot} -% \VignettePackage{ggdendro} -% \VignetteKeyword{dendrogram} -% \VignetteKeyword{ggplot} - - -% Definitions -\newcommand{\ggdendro}{\texttt{ggdendro}} -\newcommand{\dendrodata}{\texttt{dendro\_data}} -\newcommand{\code}[1]{\texttt{#1}} -\newcommand{\ggplot}{\texttt{ggplot}} - -\documentclass[10pt,oneside]{article} - -\begin{document} -\pagestyle{empty} - -\setlength{\baselineskip}{1.25em} -\setlength{\parskip}{0.5em} -\setlength{\parindent}{0.0em} - -%\begin{titlepage} -\title{Using the \ggdendro{} package for plotting dendrograms and tree diagrams} -\author{Andrie de Vries} -%\end{titlepage} -\maketitle{} - -\ggdendro{} is a package that makes it easy to extract dendrogram and tree diagrams into a data frame. - -\section{Introduction} - -The \ggdendro{} package provides a general framework to extract the plot data for a dendrograms and tree diagrams. - -It does this by providing generic function \dendrodata{} that will extract the appropriate segment data as well as labels. This data is returned as a list of data.frames. These data frames can be extracted using three accessor functions: - -\begin{itemize} -\item \code{segment} -\item \code{label} -\item \code{leaf\_label} -\end{itemize} - -The package also provides two convenient wrapper functions: - -\begin{itemize} -\item\code{ggdendrogram} is a wrapper around \ggplot{} to create a dendrogram using a single line of code. The resulting object is of class \ggplot{}, so can be manipulated using the \ggplot{} tools. -\item\code{theme\_dendro} is a \ggplot{} theme with a blank canvas, i.e. no axes, axis labels or tick marks. -\end{itemize} - -The \code{ggplot} package doesn't get loaded automatically, so remember to load it first: - -<>= -library(ggplot2) -library(ggdendro) -@ - -%------------------------------------------------------------------------------ -\section{Using the \code{ggdendrogram} wrapper} - -The \ggdendro{} package will extract all of the plot data from dendrogram objects. Sometimes it is useful to have fine-grained control over the plot. Other times it might be more convenient to have a simple wrapper around \code{ggplot} to produce a dendrogram with a small amount of code. - -The function \code{ggdendrogram} provides such a wrapper to produce a plot with a single line of code. It provides a few options for controlling the display of line segments, labels and plot rotation (rotated by 90 degrees or not). - -<>= -hc <- hclust(dist(USArrests), "ave") -p <- ggdendrogram(hc, rotate=FALSE, size=2) -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2in]{ggdendro-dendrogram} -\end{center} -\caption{A dendrogram produced using \code{ggdendrogram}} -\end{figure} - -The next section shows how to take full control over the data extraction and subsequent plotting. - -%------------------------------------------------------------------------------ -\section{Extracting the dendrogram plot data using \dendrodata{}} - -The \code{hclust} and \code{dendrogram} functions in R makes it easy to plot the results of hierarchical cluster analysis and other dendrograms in R. However, it is hard to extract the data from this analysis to customise these plots, since the \code{plot} functions for both these classes prints directly without the option of returning the plot data. - -<>= -hc <- hclust(dist(USArrests), "ave") -dhc <- as.dendrogram(hc) -# Rectangular lines -ddata <- dendro_data(dhc, type="rectangle") -p <- ggplot(segment(ddata)) + - geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + - coord_flip() + scale_y_reverse(expand=c(0.2, 0)) -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2in]{ggdendro-dendro1} -\end{center} -\caption{A dendrogram produced using \dendrodata{} and \code{ggplot}} -\end{figure} - - -Of course, using ggplot to create the dendrogram means one has full control over the appearance of the plot. For example, here is the same data, but this time plotted horizontally with a clean background. In \code{ggplot} this means passing a number of options to \code{opts}. \ggdendro{} has a convenient function, \code{theme\_dendro} that wraps these options into a convenient function. - -<>= -p <- p + coord_flip() + theme_dendro() -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2in]{ggdendro-dendro2} -\end{center} -\caption{Dendrogram rotated on clear background} -\end{figure} - -Dendrograms can also be drawn using triangular lines instead of rectangular lines. For example: - -<>= -ddata <- dendro_data(dhc, type="triangle") -p <- ggplot(segment(ddata)) + - geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + - coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + - theme_dendro() -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2in]{ggdendro-dendro3} -\end{center} -\caption{A dendrogram with triangular connection lines} -\end{figure} - - - -%------------------------------------------------------------------------------ -\section{Regression tree diagrams} - -The \code{tree} function in package \code{tree} creates tree diagrams. To extract the plot data for these diagrams using \ggdendro{} follows the same basic pattern as dendrograms: - -<>= -require(tree) -data(cpus, package="MASS") -cpus.ltr <- tree(log10(perf) ~ - syct+mmin+mmax+cach+chmin+chmax, cpus) -tree_data <- dendro_data(cpus.ltr) -p <- ggplot(segment(tree_data)) + - geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), - colour="blue", alpha=0.5) + - scale_size("n") + - geom_text(data=label(tree_data), - aes(x=x, y=y, label=label), vjust=-0.5, size=3) + - geom_text(data=leaf_label(tree_data), - aes(x=x, y=y, label=label), vjust=0.5, size=2) + - theme_dendro() -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2.5in]{ggdendro-tree1} -\end{center} -\caption{Regression tree plot} -\end{figure} - - -%------------------------------------------------------------------------------ -\section{Classification tree diagrams} - -The \code{rpart} function in package \code{rpart} creates classification diagrams. To extract the plot data for these diagrams using \ggdendro{} follows the same basic pattern as dendrograms: - -<>= -library(rpart) -fit <- rpart(Kyphosis ~ Age + Number + Start, - method="class", data=kyphosis) -fitr <- dendro_data(fit) -p <- ggplot() + - geom_segment(data=fitr$segments, - aes(x=x, y=y, xend=xend, yend=yend)) + - geom_text(data=fitr$labels, - aes(x=x, y=y, label=label), size=3, vjust=0) + - geom_text(data=fitr$leaf_labels, - aes(x=x, y=y, label=label), size=3, vjust=1) + - theme_dendro() -print(p) -@ - -\begin{figure}[h] -\begin{center} -\includegraphics[width=4in, height=2.5in]{ggdendro-rpart1} -\end{center} -\caption{Classification tree plot} -\end{figure} - - -%------------------------------------------------------------------------------ -\section{Conclusion} - -The \ggdendro{} package makes it easy to extract the line segment and label data from hclust, dendrogram and tree objects. - - -% Start a new page -% Not echoed, not evaluated -% ONLY here for checkVignettes so that all output doesn't -% end up on one enormous page - -\end{document} - - - - + +% \VignetteIndexEntry{Plotting dendrograms and tree diagrams with ggplot} +% \VignettePackage{ggdendro} +% \VignetteKeyword{dendrogram} +% \VignetteKeyword{ggplot} + + +% Definitions +\newcommand{\ggdendro}{\texttt{ggdendro}} +\newcommand{\dendrodata}{\texttt{dendro\_data}} +\newcommand{\code}[1]{\texttt{#1}} +\newcommand{\ggplot}{\texttt{ggplot}} + +\documentclass[10pt,oneside]{article} + +\begin{document} +\pagestyle{empty} + +\setlength{\baselineskip}{1.25em} +\setlength{\parskip}{0.5em} +\setlength{\parindent}{0.0em} + +%\begin{titlepage} +\title{Using the \ggdendro{} package for plotting dendrograms and tree diagrams} +\author{Andrie de Vries} +%\end{titlepage} +\maketitle{} + +\ggdendro{} is a package that makes it easy to extract dendrogram and tree diagrams into a data frame. + +\section{Introduction} + +The \ggdendro{} package provides a general framework to extract the plot data for a dendrograms and tree diagrams. + +It does this by providing generic function \dendrodata{} that will extract the appropriate segment data as well as labels. This data is returned as a list of data.frames. These data frames can be extracted using three accessor functions: + +\begin{itemize} +\item \code{segment} +\item \code{label} +\item \code{leaf\_label} +\end{itemize} + +The package also provides two convenient wrapper functions: + +\begin{itemize} +\item\code{ggdendrogram} is a wrapper around \ggplot{} to create a dendrogram using a single line of code. The resulting object is of class \ggplot{}, so can be manipulated using the \ggplot{} tools. +\item\code{theme\_dendro} is a \ggplot{} theme with a blank canvas, i.e. no axes, axis labels or tick marks. +\end{itemize} + +The \code{ggplot} package doesn't get loaded automatically, so remember to load it first: + +<>= +library(ggplot2) +library(ggdendro) +@ + +%------------------------------------------------------------------------------ +\section{Using the \code{ggdendrogram} wrapper} + +The \ggdendro{} package will extract all of the plot data from dendrogram objects. Sometimes it is useful to have fine-grained control over the plot. Other times it might be more convenient to have a simple wrapper around \code{ggplot} to produce a dendrogram with a small amount of code. + +The function \code{ggdendrogram} provides such a wrapper to produce a plot with a single line of code. It provides a few options for controlling the display of line segments, labels and plot rotation (rotated by 90 degrees or not). + +<>= +hc <- hclust(dist(USArrests), "ave") +p <- ggdendrogram(hc, rotate=FALSE, size=2) +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2in]{ggdendro-dendrogram} +\end{center} +\caption{A dendrogram produced using \code{ggdendrogram}} +\end{figure} + +The next section shows how to take full control over the data extraction and subsequent plotting. + +%------------------------------------------------------------------------------ +\section{Extracting the dendrogram plot data using \dendrodata{}} + +The \code{hclust} and \code{dendrogram} functions in R makes it easy to plot the results of hierarchical cluster analysis and other dendrograms in R. However, it is hard to extract the data from this analysis to customise these plots, since the \code{plot} functions for both these classes prints directly without the option of returning the plot data. + +<>= +hc <- hclust(dist(USArrests), "ave") +dhc <- as.dendrogram(hc) +# Rectangular lines +ddata <- dendro_data(dhc, type="rectangle") +p <- ggplot(segment(ddata)) + + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + + coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2in]{ggdendro-dendro1} +\end{center} +\caption{A dendrogram produced using \dendrodata{} and \code{ggplot}} +\end{figure} + + +Of course, using ggplot to create the dendrogram means one has full control over the appearance of the plot. For example, here is the same data, but this time plotted horizontally with a clean background. In \code{ggplot} this means passing a number of options to \code{opts}. \ggdendro{} has a convenient function, \code{theme\_dendro} that wraps these options into a convenient function. + +<>= +p <- p + coord_flip() + theme_dendro() +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2in]{ggdendro-dendro2} +\end{center} +\caption{Dendrogram rotated on clear background} +\end{figure} + +Dendrograms can also be drawn using triangular lines instead of rectangular lines. For example: + +<>= +ddata <- dendro_data(dhc, type="triangle") +p <- ggplot(segment(ddata)) + + geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) + + coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + + theme_dendro() +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2in]{ggdendro-dendro3} +\end{center} +\caption{A dendrogram with triangular connection lines} +\end{figure} + + + +%------------------------------------------------------------------------------ +\section{Regression tree diagrams} + +The \code{tree} function in package \code{tree} creates tree diagrams. To extract the plot data for these diagrams using \ggdendro{} follows the same basic pattern as dendrograms: + +<>= +require(tree) +data(cpus, package="MASS") +cpus.ltr <- tree(log10(perf) ~ + syct+mmin+mmax+cach+chmin+chmax, cpus) +tree_data <- dendro_data(cpus.ltr) +p <- ggplot(segment(tree_data)) + + geom_segment(aes(x=x, y=y, xend=xend, yend=yend, size=n), + colour="blue", alpha=0.5) + + scale_size("n") + + geom_text(data=label(tree_data), + aes(x=x, y=y, label=label), vjust=-0.5, size=3) + + geom_text(data=leaf_label(tree_data), + aes(x=x, y=y, label=label), vjust=0.5, size=2) + + theme_dendro() +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2.5in]{ggdendro-tree1} +\end{center} +\caption{Regression tree plot} +\end{figure} + + +%------------------------------------------------------------------------------ +\section{Classification tree diagrams} + +The \code{rpart} function in package \code{rpart} creates classification diagrams. To extract the plot data for these diagrams using \ggdendro{} follows the same basic pattern as dendrograms: + +<>= +library(rpart) +fit <- rpart(Kyphosis ~ Age + Number + Start, + method="class", data=kyphosis) +fitr <- dendro_data(fit) +p <- ggplot() + + geom_segment(data=fitr$segments, + aes(x=x, y=y, xend=xend, yend=yend)) + + geom_text(data=fitr$labels, + aes(x=x, y=y, label=label), size=3, vjust=0) + + geom_text(data=fitr$leaf_labels, + aes(x=x, y=y, label=label), size=3, vjust=1) + + theme_dendro() +print(p) +@ + +\begin{figure}[h] +\begin{center} +\includegraphics[width=4in, height=2.5in]{ggdendro-rpart1} +\end{center} +\caption{Classification tree plot} +\end{figure} + + +%------------------------------------------------------------------------------ +\section{Conclusion} + +The \ggdendro{} package makes it easy to extract the line segment and label data from hclust, dendrogram and tree objects. + + +% Start a new page +% Not echoed, not evaluated +% ONLY here for checkVignettes so that all output doesn't +% end up on one enormous page + +\end{document} + + + +