Skip to content
This repository
Fetching contributors…

Cannot retrieve contributors at this time

file 146 lines (123 sloc) 5.446 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
StatSmooth <- proto(Stat, {
  calculate_groups <- function(., data, scales, ...) {
    rows <- daply(data, .(group), function(df) length(unique(df$x)))
    
    if (all(rows == 1) && length(rows) > 1) {
      message("geom_smooth: Only one unique x value each group.",
        "Maybe you want aes(group = 1)?")
      return(data.frame())
    }
    
    .super$calculate_groups(., data, scales, ...)
  }
  
  calculate <- function(., data, scales, method="auto", formula=y~x, se = TRUE, n=80, fullrange=FALSE, xseq = NULL, level=0.95, na.rm = FALSE, ...) {
    data <- remove_missing(data, na.rm, c("x", "y"), name="stat_smooth")
    if (length(unique(data$x)) <= 2) {
      # Not enough data to perform fit
      return(data.frame())
    }
    
    # Figure out what type of smoothing to do: loess for small datasets,
    # gam with a cubic regression basis for large data
    if (is.character(method) && method == "auto") {
      if (nrow(data) < 1000) {
        method <- "loess"
      } else {
        try_require("mgcv")
        method <- gam
        formula <- y ~ s(x, bs = "cs")
      }
    }
    
    if (is.null(data$weight)) data$weight <- 1
    
    if (is.null(xseq)) {
      if (is.integer(data$x)) {
        if (fullrange) {
          xseq <- scales$x$input_set()
        } else {
          xseq <- sort(unique(data$x))
        }
      } else {
        if (fullrange) {
          range <- scales$x$output_set()
        } else {
          range <- range(data$x, na.rm=TRUE)
        }
        xseq <- seq(range[1], range[2], length=n)
      }
    }
    if (is.character(method)) method <- match.fun(method)
    
    method.special <- function(...)
      method(formula, data=data, weights=weight, ...)
    model <- safe.call(method.special, list(...), names(formals(method)))
    
    predictdf(model, xseq, se, level)
  }
  
  objname <- "smooth"
  desc <- "Add a smoother"
  details <- "Aids the eye in seeing patterns in the presence of overplotting."
  icon <- function(.) GeomSmooth$icon()
  
  required_aes <- c("x", "y")
  default_geom <- function(.) GeomSmooth
  desc_params <- list(
    method = "smoothing method (function) to use, eg. lm, glm, gam, loess, rlm",
    formula = "formula to use in smoothing function, eg. y ~ x, y ~ poly(x, 2), y ~ log(x)",
    se = "display confidence interval around smooth? (true by default, see level to control)",
    fullrange = "should the fit span the full range of the plot, or just the data",
    level = "level of confidence interval to use (0.95 by default)",
    n = "number of points to evaluate smoother at",
    xseq = "exact points to evaluate smooth at, overrides n",
    "..." = "other arguments are passed to smoothing function"
  )
  desc_outputs <- list(
    "y" = "predicted value",
    "ymin" = "lower pointwise confidence interval around the mean",
    "ymax" = "upper pointwise confidence interval around the mean",
    "se" = "standard error"
  )
  
  seealso <- list(
    lm = "for linear smooths",
    glm = "for generalised linear smooths",
    loess = "for local smooths"
  )
  
  examples <- function(.) {
    c <- ggplot(mtcars, aes(qsec, wt))
    c + stat_smooth()
    c + stat_smooth() + geom_point()

    # Adjust parameters
    c + stat_smooth(se = FALSE) + geom_point()

    c + stat_smooth(span = 0.9) + geom_point()
    c + stat_smooth(method = "lm") + geom_point()
    
    library(splines)
    c + stat_smooth(method = "lm", formula = y ~ ns(x,3)) +
      geom_point()
    c + stat_smooth(method = MASS::rlm, formula= y ~ ns(x,3)) + geom_point()
    
    # The default confidence band uses a transparent colour.
    # This currently only works on a limited number of graphics devices
    # (including Quartz, PDF, and Cairo) so you may need to set the
    # fill colour to a opaque colour, as shown below
    c + stat_smooth(fill = "grey50", size = 2, alpha = 1)
    c + stat_smooth(fill = "blue", size = 2, alpha = 1)
    
    # The colour of the line can be controlled with the colour aesthetic
    c + stat_smooth(fill="blue", colour="darkblue", size=2)
    c + stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
    c + geom_point() +
      stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
    
    # Smoothers for subsets
    c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl)
    c + stat_smooth(method=lm) + geom_point()
    c + stat_smooth(method=lm, fullrange=T) + geom_point()
    
    # Geoms and stats are automatically split by aesthetics that are factors
    c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl)))
    c + stat_smooth(method=lm) + geom_point()
    c + stat_smooth(method=lm, aes(fill = factor(cyl))) + geom_point()
    c + stat_smooth(method=lm, fullrange=TRUE, alpha = 0.1) + geom_point()

    # Use qplot instead
    qplot(qsec, wt, data=mtcars, geom=c("smooth", "point"))
    
    # Example with logistic regression
    data("kyphosis", package="rpart")
    qplot(Age, Kyphosis, data=kyphosis)
    qplot(Age, data=kyphosis, facets = . ~ Kyphosis, binwidth = 10)
    qplot(Age, Kyphosis, data=kyphosis, position="jitter")
    qplot(Age, Kyphosis, data=kyphosis, position=position_jitter(y=5))

    qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) +
      stat_smooth(method="glm", family="binomial")
    qplot(Age, as.numeric(Kyphosis) - 1, data=kyphosis) +
      stat_smooth(method="glm", family="binomial", formula = y ~ ns(x, 2))
    
  }
})
Something went wrong with that request. Please try again.