From ab47cf1978ec11766f57ea157ae30b1a94b7c9a8 Mon Sep 17 00:00:00 2001 From: Boris Leroy Date: Thu, 25 Sep 2014 00:00:00 +0000 Subject: [PATCH] version 1.0 --- DESCRIPTION | 20 ++ MD5 | 29 ++ NAMESPACE | 19 ++ R/convertToPA.R | 556 +++++++++++++++++++++++++++++++++ R/formatFunctions.R | 94 ++++++ R/generateRandomSp.R | 287 +++++++++++++++++ R/generateSpFromFun.R | 268 ++++++++++++++++ R/generateSpFromPCA.R | 299 ++++++++++++++++++ R/genericfunctions.R | 145 +++++++++ R/limitDistribution.R | 254 +++++++++++++++ R/plotResponse.R | 372 ++++++++++++++++++++++ R/removeCollinearity.R | 163 ++++++++++ R/sampleOccurrences.R | 562 ++++++++++++++++++++++++++++++++++ R/sp.env.functions.R | 108 +++++++ R/synchroniseNA.R | 49 +++ R/virtualspecies-package.R | 64 ++++ man/convertToPA.Rd | 160 ++++++++++ man/formatFunctions.Rd | 74 +++++ man/generateRandomSp.Rd | 144 +++++++++ man/generateSpFromFun.Rd | 137 +++++++++ man/generateSpFromPCA.Rd | 142 +++++++++ man/limitDistribution.Rd | 127 ++++++++ man/linearFun.Rd | 35 +++ man/logisticFun.Rd | 59 ++++ man/plotResponse.Rd | 93 ++++++ man/quadraticFun.Rd | 38 +++ man/removeCollinearity.Rd | 88 ++++++ man/sampleOccurrences.Rd | 218 +++++++++++++ man/synchroniseNA.Rd | 40 +++ man/virtualspecies-package.Rd | 67 ++++ 30 files changed, 4711 insertions(+) create mode 100644 DESCRIPTION create mode 100644 MD5 create mode 100644 NAMESPACE create mode 100644 R/convertToPA.R create mode 100644 R/formatFunctions.R create mode 100644 R/generateRandomSp.R create mode 100644 R/generateSpFromFun.R create mode 100644 R/generateSpFromPCA.R create mode 100644 R/genericfunctions.R create mode 100644 R/limitDistribution.R create mode 100644 R/plotResponse.R create mode 100644 R/removeCollinearity.R create mode 100644 R/sampleOccurrences.R create mode 100644 R/sp.env.functions.R create mode 100644 R/synchroniseNA.R create mode 100644 R/virtualspecies-package.R create mode 100644 man/convertToPA.Rd create mode 100644 man/formatFunctions.Rd create mode 100644 man/generateRandomSp.Rd create mode 100644 man/generateSpFromFun.Rd create mode 100644 man/generateSpFromPCA.Rd create mode 100644 man/limitDistribution.Rd create mode 100644 man/linearFun.Rd create mode 100644 man/logisticFun.Rd create mode 100644 man/plotResponse.Rd create mode 100644 man/quadraticFun.Rd create mode 100644 man/removeCollinearity.Rd create mode 100644 man/sampleOccurrences.Rd create mode 100644 man/synchroniseNA.Rd create mode 100644 man/virtualspecies-package.Rd diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..e0a2e9c --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,20 @@ +Package: virtualspecies +Type: Package +Title: Generation of Virtual Species Distributions +Version: 1.0 +Date: 2014-09-25 +Author: Boris Leroy with help from C. N. Meynard, C. Bellard & F. Courchamp +Maintainer: Boris Leroy +Description: Provides a framework for generating virtual species distributions, + a procedure increasingly used in ecology to improve species distribution models. + This package integrates the existing methodological approaches with the + objective of generating virtual species distributions with increased + ecological realism. +License: GPL (>= 2.0) +Depends: raster +Imports: ade4, dismo, rworldmap +URL: http://borisleroy.com/virtualspecies +Packaged: 2014-09-25 17:20:12 UTC; Farewell +NeedsCompilation: no +Repository: CRAN +Date/Publication: 2014-09-25 23:05:19 diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..a90b932 --- /dev/null +++ b/MD5 @@ -0,0 +1,29 @@ +2488fdb6e1a16af69e02c4800d305a88 *DESCRIPTION +e7cbe424bac71c236e5f4787fc20197c *NAMESPACE +954b9a3a333726b17d5f5c3d55721591 *R/convertToPA.R +4a106719d75bfe1d580c2233bb714298 *R/formatFunctions.R +4dcc57b7066fc7e647570c04c655fff7 *R/generateRandomSp.R +d65f5c4c10b3a9323692174fe5ba876f *R/generateSpFromFun.R +73dca96770fd7a5afec125b28bfcb0db *R/generateSpFromPCA.R +9ce496e0827cbbf3130c55afdee3414a *R/genericfunctions.R +db6b370681b2e287df03b32a8697d6f6 *R/limitDistribution.R +cb1b25031ba3c4fc6ccbbad0212742c1 *R/plotResponse.R +bb34270300858ddb9e428c2ec4b40c09 *R/removeCollinearity.R +a86ac7920d89e641da1cf3e7690f6fac *R/sampleOccurrences.R +2faf926452812233665e02d624967dbd *R/sp.env.functions.R +b15899277537256370881dc1485c9fd0 *R/synchroniseNA.R +10f7fd2cb9617d5798ca620b4c15e4c9 *R/virtualspecies-package.R +889b7f8474de56ba9e6fe786af69807d *man/convertToPA.Rd +4375ef8e8187ab10c21fe2926127a6c2 *man/formatFunctions.Rd +6806310841dbe793a3264c941dd7b850 *man/generateRandomSp.Rd +f8b670bd13cac4cbb89acbf2fb517a37 *man/generateSpFromFun.Rd +b21c6e1a2dd2349d60919c8e1d6d3965 *man/generateSpFromPCA.Rd +d9eaa7c53326951683240fc68b42d557 *man/limitDistribution.Rd +109675ed162dfa4828570b431e65998d *man/linearFun.Rd +6c1051fd5184583d971c01d60633880a *man/logisticFun.Rd +359d1d2afeed20aedd189e13b65e9317 *man/plotResponse.Rd +ff3a6c192389218d977fe644d882695a *man/quadraticFun.Rd +a5bb7891d82b905e9cac8f8a8db658bc *man/removeCollinearity.Rd +99aaa02b4045e8f8aad31f699714f7ee *man/sampleOccurrences.Rd +e5ef9bf821a74cd5bd785606f28f43cc *man/synchroniseNA.Rd +b5af6b7a7b58732da97b3bc05a4481cd *man/virtualspecies-package.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..f1531db --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,19 @@ +# Generated by roxygen2 (4.0.2): do not edit by hand + +S3method(plot,virtualspecies) +S3method(print,virtualspecies) +S3method(str,virtualspecies) +export(convertToPA) +export(formatFunctions) +export(generateRandomSp) +export(generateSpFromFun) +export(generateSpFromPCA) +export(limitDistribution) +export(linearFun) +export(logisticFun) +export(plotResponse) +export(quadraticFun) +export(removeCollinearity) +export(sampleOccurrences) +export(synchroniseNA) +import(raster) diff --git a/R/convertToPA.R b/R/convertToPA.R new file mode 100644 index 0000000..bd45cdd --- /dev/null +++ b/R/convertToPA.R @@ -0,0 +1,556 @@ +#' Convert a virtual species distribution (or a suitability raster) into presence-absence +#' +#' This functions converts the probabilities of presence from the output of +#' \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, \code{\link{generateRandomSp}} +#' or a suitability raster into +#' a presence-absence raster. The conversion can be threshold-based, or based +#' on a probability of conversion (see details). +#' +#' @param x a suitability raster, or the output from functions +#' \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} +#' or \code{\link{generateRandomSp}} +#' @param PA.method \code{"threshold"} or \code{"probability"}. If +#' \code{"threshold"}, then occurrence probabilities are simply converted into +#' presence-absence according to the threshold \code{beta}. If \code{"probability"}, then +#' probabilities are converted according to a logistic function of threshold +#' \code{beta} and slope \code{alpha}. +#' @param beta \code{"random"}, a numeric value in the range of your +#' probabilities or \code{NULL}. This is the threshold of conversion into +#' presence-absence (= the inflexion point if \code{PA.method = "probability"}). +#' If \code{"random"}, a numeric value will be randomly generated within the range +#' of \code{x}. +#' @param alpha \code{NULL} or a negative numeric value. Only useful if +#' \code{PA.method = "probability"}. The value of \code{alpha} will +#' shape the logistic function transforming occurrences into presence-absences. +#' See \code{\link{logisticFun}} and examples therein for the choice of \code{alpha} +#' @param species.prevalence \code{NULL} or a numeric value between 0 and 1. +#' The species prevalence is the proportion of sites actually occupied by the +#' species. +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, maps of probabilities +#' of occurrence and presence-absence will be plotted. +#' +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @references +#' Meynard C.N. & Kaplan D.M. 2013. Using virtual species to study species +#' distributions and model performance. +#' \emph{Journal of Biogeography} \bold{40}:1-8 +#' +#' Meynard C.N. & Kaplan D.M. 2011. The effect of a gradual response to the +#' environment on species distribution model performance. +#' \emph{Ecography} \bold{35}:499-509 +#' +#' @details +#' The conversion of probabilities of occurrence into presence - absence is +#' usually performed by selecting a threshold above which presence always occurs, +#' and never below. However, this approach may be too much unrealistic because +#' species may sometime be present in areas with a low probability of occurrence, +#' or be absent from areas with a high probability of occurrence. In addition, +#' when using a threshold you erase the previously generated response shapes: +#' it all becomes threshold. Thus, this threshold approach should be avoided. +#' +#' +#' Hence, a more +#' realistic conversion consists in converting probabilities into presence - +#' absence with a probability function (see references). Such a probability +#' conversion can be performed here with a logit function +#' (see \code{\link{logisticFun}}). +#' +#' To perform the probability conversion you have to define two of the +#' three following parameters: +#' \itemize{ +#' \item{\code{beta}: the 'threshold' of the logistic function (i.e. the +#' inflexion point)} +#' \item{\code{alpha}: the slope of the logistic function} +#' \item{\code{species.prevalence}: the proportion of sites in which the species +#' occur} +#' } +#' +#' If you provide \code{beta} and \code{alpha}, the \code{species.prevalence} +#' is calculated immediately calculated after conversion into presence-absence. +#' +#' On the other hand, if you provide \code{species.prevalence} and either +#' \code{beta} or \code{alpha}, the function will try to determine \code{alpha} +#' (if you provided \code{beta}) or \code{beta} (if you provided \code{alpha}). +#' +#' The relationship between species prevalence, alpha and beta is dependent +#' on the available range of environmental conditions (see Meynard and Kaplan, +#' 2011 and especially the Supporting Information). As a consequence, the +#' desired species prevalence may not be available for the defined \code{alpha} +#' or \code{beta}. In these conditions, the function will retain the \code{alpha} or +#' \code{beta} which provides the closest prevalence to your \code{species.prevalence}, +#' but you may also provide another value of \code{alpha} or \code{beta} to obtain +#' a closer prevalence. +#' +#' In all cases, the \code{species.prevalence} indicated in the output is the +#' prevalence measured on the output presence-absence map. +#' @note +#' The approximation of \code{alpha} or \code{beta} to the chosen +#' \code{species.prevalence} may take time if you work on very large rasters. +#' @return +#' a \code{list} containing 5 elements: +#' \itemize{ +#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +#' \item{\code{details}: the details and parameters used to generate the species} +#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +#' environmental suitability)} +#' \item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +#' \item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +#' } +#' The structure of the virtualspecies object can be seen using str() +#' @examples +#' # Create an example stack with two environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100)) +#' names(env) <- c("variable1", "variable2") +#' +#' # Creation of the parameter list +#' parameters <- formatFunctions(variable1 = c(fun = 'dnorm', mean = 0.00012, +#' sd = 0.0001), +#' variable2 = c(fun = 'linearFun', a = 1, b = 0)) +#' sp1 <- generateSpFromFun(env, parameters, plot = FALSE) +#' +#' # Conversion into presence-absence with a threshold-based approach +#' convertToPA(sp1, PA.method = "threshold", beta = 0.2, plot = TRUE) +#' convertToPA(sp1, PA.method = "threshold", beta = "random", plot = TRUE) +#' +#' # Conversion into presence-absence with a probability-based approach +#' convertToPA(sp1, PA.method = "probability", beta = 0.4, +#' alpha = -0.05, plot = TRUE) +#' convertToPA(sp1, PA.method = "probability", beta = "random", +#' alpha = -0.1, plot = TRUE) +#' +#' # Conversion into presence-absence by choosing the prevalence +#' # Threshold method +#' convertToPA(sp1, PA.method = "threshold", +#' species.prevalence = 0.3, plot = TRUE) +#' # Probability method, with alpha provided +#' convertToPA(sp1, PA.method = "probability", alpha = -0.1, +#' species.prevalence = 0.2, plot = TRUE) +#' # Probability method, with beta provided +#' convertToPA(sp1, PA.method = "probability", beta = 0.5, +#' species.prevalence = 0.2, alpha = NULL, +#' plot = TRUE) +#' +#' # Plot the output Presence-Absence raster only +#' sp1 <- convertToPA(sp1, plot = FALSE) +#' plot(sp1$pa.raster) + + +convertToPA <- function(x, + PA.method = "probability", + beta = "random", + alpha = -.05, + species.prevalence = NULL, + plot = TRUE) + +{ + if("virtualspecies" %in% class(x)) + { + if(class(x$suitab.raster) == "RasterLayer") + { + sp.raster <- x$suitab.raster + } else stop("x must be:\n- a raster layer object\nor\n- the output list from functions + generateSpFromFun(), generateSpFromPCA() or generateRandomSp()") + } else if (is.raster(x)) + { + sp.raster <- x + } else stop("x must be:\n- a raster layer object\nor\n- the output list from functions + generateSpFromFun(), generateSpFromPCA() or generateRandomSp()") + + if(any(is.na(maxValue(sp.raster)))) + { + sp.raster <- setMinMax(sp.raster) + } + + if(PA.method == "threshold") + { + if(is.numeric(beta)) + { + if(is.numeric(species.prevalence)) + { + warning("Both beta and species.prevalence were provided. beta will be + ignored.") + beta <- NULL + } else if(beta < sp.raster@data@min | beta > sp.raster@data@max) + { + stop("beta must be in the range of your suitability raster") + } + } else if(beta == "random") + { + if(is.numeric(species.prevalence)) + { + beta <- NULL + } else + { + beta <- sample(seq(sp.raster@data@min, + sp.raster@data@max, length = 1000), 1) + } + } else if(is.null(beta)) + { + if(is.null(species.prevalence)) + { + stop("Either provide beta or species.prevalence when choosing + PA.method = 'threshold'") + } + } else + { + stop("beta must either be 'random', a numeric value within the range of + your data or NULL") + } + } else if(PA.method == "probability") + { + if(length(c(alpha, beta, species.prevalence)) < 1) + { + if(!is.null(species.prevalence)) + { + warning("Neither alpha nor beta were provided. As a consequence, alpha + will be determined to a random value, and beta will be adjusted + automatically to the desired species prevalence.") + alpha <- -sample(c(seq((sp.raster@data@max - sp.raster@data@min)/1000, + (sp.raster@data@max - sp.raster@data@min)/100, length = 10), + seq((sp.raster@data@max - sp.raster@data@min)/100, + (sp.raster@data@max - sp.raster@data@min)/10, length = 100), + seq((sp.raster@data@max - sp.raster@data@min)/10, + (sp.raster@data@max - sp.raster@data@min)*10, length = 10)), size = 1) + } else + { + stop("If you choose PA.method = 'probability', you must provide two of the + three following parameters: beta, alpha and species.prevalence.") + } + } else if(length(c(alpha, beta, species.prevalence)) > 2) + { + if(beta != "random") + { + warning("You should not provide the three parameters beta, alpha and + species.prevalence. beta will be ignored and automatically defined by the + function") + } + beta <- NULL + } + # Checking the arguments. At this stage only two of them should be not NULL + if(!is.null(beta)) + { + if(is.numeric(beta)) + { + if(beta < sp.raster@data@min | beta > sp.raster@data@max) + { + stop("beta must be in the range of your suitability raster") + } + } else if(beta == "random") + { + beta <- sample(seq(sp.raster@data@min, + sp.raster@data@max, length = 1000), 1) + } else + { + stop("beta must either be 'random', a numeric value within the range of + your data or NULL") + } + } + + if(!is.null(species.prevalence)) + { + if(is.numeric(species.prevalence)) + { + if(!(species.prevalence >= 0 & + species.prevalence <= 1)) + { + stop("species.prevalence must be a numeric value between 0 and 1.") + } + } else + { + stop("species.prevalence must either be a numeric value between 0 and 1 + or NULL") + } + } + + if(!is.null(alpha)) + { + if(!is.numeric(alpha)) + { + stop("Please provide a numeric value to alpha") + } else if(alpha > 0) + { + warning("alpha was provided > 0. + This means that low probabilities will be converted to presences, + and high probabilities will be converted to absences. + If this is not what was intended, provide a negative alpha.") + } + } + + if(!is.null(species.prevalence)) + { + if(!is.null(beta)) + { + message(" --- Determing alpha automatically according to beta and species.prevalence\n\n") + } else + { + message(" --- Determing beta automatically according to alpha and species.prevalence\n\n") + } + } else + { + message(" --- Determing species.prevalence automatically according to alpha and beta\n\n") + } + } + + if (PA.method == "probability") + { + if(!is.null(species.prevalence)) + { + if(!is.null(beta)) + { + alpha.test <- NULL + for (alpha in c((sp.raster@data@max - sp.raster@data@min)/1000, (sp.raster@data@max - sp.raster@data@min) * 10)) + { + if(alpha > 0) alpha <- -alpha + PA.raster <- calc(sp.raster, fun = function(x) + { + logisticFun(x, beta = beta, alpha = alpha) + }) + + PA.raster <- calc(PA.raster, fun = function(x) + { + sapply(x, FUN = function(y) + { + if(is.na(y)) + { NA } else + { + sample(x = c(0, 1), size = 1, prob = c(1 - y, y)) + } + } + ) + } + ) + freqs <- freq(PA.raster) + if(any(is.na(freqs[, 1]))) + { + freqs <- freqs[-which(is.na(freqs[, 1])), ] + } + alpha.test <- rbind(alpha.test, c(alpha, + ifelse(nrow(freqs) == 2, + freqs[freqs[, "value"] == 1, "count"] / sum(freqs[, "count"]), + ifelse(freqs[, "value"] == 0, + 0, 1)))) + } + epsilon <- species.prevalence - alpha.test[, 2] + if(all(epsilon > 0)) + { + warning(paste("Warning, the desired species prevalence cannot be obtained, because of the chosen beta and available environmental conditions (see details). + The closest possible estimate of prevalence was", round(alpha.test[2, 2], 2), + "\nPerhaps you can try a lower beta value.")) + alpha <- alpha.test[2, 1] + } else if (all(epsilon < 0)) + { + warning(paste("Warning, the desired species prevalence cannot be obtained, because of the chosen beta and available environmental conditions (see details). + The closest possible estimate of prevalence was", round(alpha.test[1, 2], 2), + "\nPerhaps you can try a higher beta value.")) + alpha <- alpha.test[1, 1] + } else + { + while (all(abs(epsilon) > 0.01)) + { + alpha <- (alpha.test[which(epsilon == max(epsilon[epsilon < 0])), 1] + + alpha.test[which(epsilon == min(epsilon[epsilon > 0])), 1]) / 2 + PA.raster <- calc(sp.raster, fun = function(x) + { + logisticFun(x, beta = beta, alpha = alpha) + }) + + PA.raster <- calc(PA.raster, fun = function(x) + { + sapply(x, FUN = function(y) + { + if(is.na(y)) + { NA } else + { + sample(x = c(0, 1), size = 1, prob = c(1 - y, y)) + } + } + ) + } + ) + freqs <- freq(PA.raster) + if(any(is.na(freqs[, 1]))) + { + freqs <- freqs[-which(is.na(freqs[, 1])), ] + } + alpha.test <- rbind(alpha.test, c(alpha, + ifelse(nrow(freqs) == 2, + freqs[freqs[, "value"] == 1, "count"] / sum(freqs[, "count"]), + ifelse(freqs[, "value"] == 0, + 0, 1)))) + + epsilon <- species.prevalence - alpha.test[, 2] + } + } + } else + { + beta.test <- NULL + for (beta in c(sp.raster@data@min, sp.raster@data@max)) + { + PA.raster <- calc(sp.raster, fun = function(x) + { + logisticFun(x, beta = beta, alpha = alpha) + }) + + PA.raster <- calc(PA.raster, fun = function(x) + { + sapply(x, FUN = function(y) + { + if(is.na(y)) + { NA } else + { + sample(x = c(0, 1), size = 1, prob = c(1 - y, y)) + } + } + ) + } + ) + + freqs <- freq(PA.raster) + if(any(is.na(freqs[, 1]))) + { + freqs <- freqs[-which(is.na(freqs[, 1])), ] + } + beta.test <- rbind(beta.test, c(beta, + ifelse(nrow(freqs) == 2, + freqs[freqs[, "value"] == 1, "count"] / sum(freqs[, "count"]), + ifelse(freqs[, "value"] == 0, + 0, 1)))) + } + epsilon <- species.prevalence - beta.test[, 2] + if(all(epsilon > 0)) + { + warning(paste("Warning, the desired species prevalence cannot be obtained, because of the chosen alpha and available environmental conditions (see details). + The closest possible estimate of prevalence was", round(beta.test[1, 2], 2), + "\nPerhaps you can try an alpha value closer to 0.")) + beta <- beta.test[1, 1] + } else if (all(epsilon < 0)) + { + warning(paste("Warning, the desired species prevalence cannot be obtained, because of the chosen beta and available environmental conditions (see details). + The closest possible estimate of prevalence was", round(beta.test[1, 2], 2), + "\nPerhaps you can try an alpha value closer to 0.")) + beta <- beta.test[2, 1] + } else + { + while (all(abs(epsilon) > 0.01)) + { + beta <- (beta.test[which(epsilon == max(epsilon[epsilon < 0])), 1] + + beta.test[which(epsilon == min(epsilon[epsilon > 0])), 1]) / 2 + PA.raster <- calc(sp.raster, fun = function(x) + { + logisticFun(x, beta = beta, alpha = alpha) + }) + + PA.raster <- calc(PA.raster, fun = function(x) + { + sapply(x, FUN = function(y) + { + if(is.na(y)) + { NA } else + { + sample(x = c(0, 1), size = 1, prob = c(1 - y, y)) + } + } + ) + } + ) + freqs <- freq(PA.raster) + if(any(is.na(freqs[, 1]))) + { + freqs <- freqs[-which(is.na(freqs[, 1])), ] + } + beta.test <- rbind(beta.test, c(beta, + ifelse(nrow(freqs) == 2, + freqs[freqs[, "value"] == 1, "count"] / sum(freqs[, "count"]), + ifelse(freqs[, "value"] == 0, + 0, 1)))) + epsilon <- species.prevalence - beta.test[, 2] + } + } + } + } + + PA.raster <- calc(sp.raster, fun = function(x) + { + logisticFun(x, beta = beta, alpha = alpha) + }) + PA.raster <- calc(PA.raster, fun = function(x) + { + sapply(x, FUN = function(y) + { + if(is.na(y)) + { NA } else + { + sample(x = c(0, 1), size = 1, prob = c(1 - y, y)) + } + } + ) + } + ) + + } else if (PA.method == "threshold") + { + if(!is.null(species.prevalence)) + { + beta <- quantile(sp.raster, 1 - species.prevalence) + names(beta) <- NULL + } + message(" - Threshold of conversion into PA:", round(beta, 3), "; method = ", PA.method, "\n") + + PA.raster <- reclassify(sp.raster, + c(-Inf, beta, 0, + beta, +Inf, 1)) + } else {stop("Wrong PA.method entered (either 'probability' or 'threshold')")} + + species.prevalence <- round(freq(PA.raster)[2, 2] / sum(freq(PA.raster)[1:2, 2]), 3) + names(species.prevalence) <- NULL + + if("virtualspecies" %in% class(x)) + { + if(PA.method == "threshold") + { + x$PA.conversion = c(conversion.method = PA.method, + cutoff = beta, + species.prevalence = species.prevalence) + } else + { + x$PA.conversion = c(conversion.method = PA.method, + alpha = alpha, + beta = beta, + species.prevalence = species.prevalence) + } + x$pa.raster = PA.raster + results <- x + if(plot) plot(stack(results$suitab.raster, results$pa.raster), main = c("Suitability", "Presence-absence")) + } else if (is.raster(x)) + { + if(PA.method == "threshold") + { + PA.conversion = c(cutoff = beta, + conversion.method = PA.method, + species.prevalence = species.prevalence) + } else + { + PA.conversion = c(conversion.method = PA.method, + alpha = alpha, + beta = beta, + species.prevalence = species.prevalence) + } + results <- list(suitab.raster = x, + PA.conversion = PA.conversion, + pa.raster = PA.raster) + if(plot) plot(stack(results$suitab.raster, results$pa.raster), main = c("Suitability", "Presence-absence")) + } + return(results) +} + + + + diff --git a/R/formatFunctions.R b/R/formatFunctions.R new file mode 100644 index 0000000..db6851d --- /dev/null +++ b/R/formatFunctions.R @@ -0,0 +1,94 @@ +#' Format and visualise functions used to generate virtual species with \code{\link{generateSpFromFun}} +#' +#' @description +#' This function is a helper function to simplify the formatting of functions +#' for generateSpFromFun. +#' @details +#' This function formats the \code{parameters} argument of \code{\link{generateSpFromFun}}. +#' For each environmental variable, provide a vector containing the function name, and its arguments. +#' +#' +#' For example, assume we want to generate a species responding to two environmental variables bio1 and bio2. +#' \itemize{ +#' \item{The response to bio1 is a normal response (\code{\link{dnorm}}), of mean 1 and standard deviation 0.5.} +#' \item{The response to bio2 is a linear response (\code{\link{linearFun}}), of slope (a) 2 and intercept (b) 5.} +#' } +#' The correct writing is: +#' +#' \code{formatFunctions( +#' bio1 = c(fun = "dnorm", mean = 1, sd = 0.5), +#' bio2 = c(fun = "linearFun", a = 2, b = 5))} +#' @section Warning: +#' Do not use 'x' as a name for your environmental variables. +#' @param x NULL or a \code{RasterStack}. If you want to visualise the functions, +#' provide your \code{RasterStack} here. +#' @param rescale \code{TRUE} or \code{FALSE}. If \code{TRUE}, individual response +#' plots are rescaled between 0 and 1. +#' @param ... the parameters to be formatted. See details. +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' my.parameters <- formatFunctions(variable1 = c(fun = 'dnorm', +#' mean = 0.00012, sd = 0.0001), +#' variable2 = c(fun = 'linearFun', a = 1, b = 0)) +#' +#' +#' my.parameters <- formatFunctions(bio1 = c(fun = "logisticFun", +#' alpha = -12.7, beta = 68), +#' bio2 = c(fun = "linearFun", +#' a = -0.03, b = 191.2), +#' bio3 = c(fun = "dnorm", +#' mean = 86.4, sd = 19.1), +#' bio4 = c(fun = "logisticFun", +#' alpha = 2198.5, beta = 11381.4)) +#' \dontrun{ +#' # An example using worldclim data +#' bio1.4 <- getData('worldclim', var='bio', res=10)[[1:4]] +#' my.parameters <- formatFunctions(x = bio1.4, +#' bio1 = c(fun = "logisticFun", +#' alpha = -12.7, beta = 68), +#' bio2 = c(fun = "linearFun", +#' a = -0.03, b = 191.2), +#' bio3 = c(fun = "dnorm", +#' mean = 86.4, sd = 19.1), +#' bio4 = c(fun = "logisticFun", +#' alpha = 2198.5, beta = 11381.4)) +#' } + +formatFunctions <- function(x = NULL, rescale = TRUE, ...) +{ + details <- list() + args <- list(...) + for (i in names(args)) + { + if(!("fun" %in% names(args[[i]]))) + { + stop(paste("No function was correctly provided for variable", i)) + } + details[[i]]$fun <- args[[i]]["fun"] + args[[i]] <- args[[i]][-(names(args[[i]]) %in% "fun")] + details[[i]]$args <- as.list(args[[i]]) + a <- sapply(args[[i]], as.numeric) + if(any(is.na(a))) + { + details[[i]]$args[[which(!is.na(a))]] <- sapply(details[[i]]$args[[which(!is.na(a))]], as.numeric) + } else + { + details[[i]]$args <- a + } + } + if (is(x, "Raster")) + { + + plotResponse(x = x, parameters = details, rescale = rescale, approach = "response") + } else if (!is.null(x)) + { + stop("x must either be NULL or a raster stack of environmental variables") + } + return(details) +} + diff --git a/R/generateRandomSp.R b/R/generateRandomSp.R new file mode 100644 index 0000000..4170083 --- /dev/null +++ b/R/generateRandomSp.R @@ -0,0 +1,287 @@ +#' Generate a random virtual species distribution from environmental variables +#' +#' @description +#' This function generates randomly a virtual species distribution. +#' +#' @param raster.stack a RasterStack object, in which each layer represent an environmental +#' variable. +#' @param approach \code{"automatic"}, \code{"random"}, \code{"response"} +#' or \code{"pca"}. This parameters defines how species will be generated. +#' \code{"automatic"}: If less than 6 variables in \code{raster.stack}, a +#' response approach will be used, otherwise a pca approach will be used. +#' \code{"random"}: the approach will be randomly picked. Otherwise choose +#' \code{"response"} or \code{"pca"}. See details. +#' @param rescale \code{TRUE} or \code{FALSE}. If \code{TRUE}, the final +#' probability of presence is rescaled between 0 and 1. +#' @param convert.to.PA \code{TRUE} or \code{FALSE}. If \code{TRUE}, the +#' virtual species distribution will also be converted into Presence-Absence. +#' @param relations [response approach] a vector containing the possible types of response function. +#' The implemented type of relations are \code{"gaussian"}, \code{"linear"}, +#' \code{"logistic"} and \code{"quadratic"}. +#' @param rescale.each.response \code{TRUE} or \code{FALSE}. If \code{TRUE}, the individual responses to +#' each environmental variable are rescaled between 0 and 1 +#' @param realistic.sp [response approach] \code{TRUE} or \code{FALSE}. If +#' \code{TRUE}, the function will try to define responses that can form a viable +#' species. If \code{FALSE}, the responses will be randomly generated +#' (may result in environmental conditions that do not co-exist). +#' @param species.type [response approach] \code{"additive"}, \code{"multiplicative"} or \code{"mixed"}. Defines +#' how the final probability of presence is calculated: if \code{"additive"}, responses to each +#' variable are summed; if \code{"multiplicative"}, responses are multiplicated; if \code{"mixed"} +#' the final probability will be a random combination of products and sums of individual responses. +#' See \code{\link{generateSpFromFun}} +#' @param niche.breadth [pca approach] \code{"any"}, \code{"narrow"} or \code{"wide"}. This parameter +#' defines how tolerant is the species regarding environmental conditions by adjusting +#' the standard deviations of the gaussian functions. See \code{\link{generateSpFromPCA}} +#' @param sample.points [pca approach] \code{TRUE} of \code{FALSE}. If you have a large +#' raster file then use this parameter to sample a number of points equal to +#' \code{nb.points}. +#' @param nb.points [pca approach] a numeric value. Only useful if \code{sample.points = TRUE}. +#' The number of sampled points from the raster, to perform the PCA. A too small +#' value may not be representative of the environmental conditions in your raster. +#' @param PA.method \code{"threshold"} or \code{"probability"}. If +#' \code{"threshold"}, then occurrence probabilities are simply converted into +#' presence-absence according to the threshold \code{beta}. If \code{"probability"}, then +#' probabilities are converted according to a logistic function of threshold +#' \code{beta} and slope \code{alpha}. +#' @param beta \code{"random"}, a numeric value in the range of your +#' probabilities or \code{NULL}. This is the threshold of conversion into +#' presence-absence (= the inflexion point if \code{PA.method = "probability"}). +#' If \code{"random"}, a numeric value will be randomly generated within the range +#' of probabilities of occurrence. See \code{\link{convertToPA}} +#' @param alpha \code{NULL} or a negative numeric value. Only useful if +#' \code{PA.method = "probability"}. The value of \code{alpha} will +#' shape the logistic function transforming occurrences into presence-absences. +#' See \code{\link{logisticFun}} and examples therein for the choice of \code{alpha} +#' @param species.prevalence \code{NULL} or a numeric value between 0 and 1. +#' The species prevalence is the proportion of sites actually occupied by the +#' species. See \code{\link{convertToPA}} +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted. +#' @details +#' By default, the function will perform a probabilistic conversion into presence- +#' absence, with a randomly chosen beta threshold. If you want to custmose the +#' conversion parameters, you have to define \bold{two} of the three following parameters: +#' \itemize{ +#' \item{\code{beta}: the 'threshold' of the logistic function (i.e. the +#' inflexion point)} +#' \item{\code{alpha}: the slope of the logistic function} +#' \item{\code{species.prevalence}: the proportion of sites in which the species +#' occur} +#' } +#' +#' If you provide \code{beta} and \code{alpha}, the \code{species.prevalence} +#' is calculated immediately calculated after conversion into presence-absence. +#' +#' As explained in \code{\link{convertToPA}}, if you choose choose a precise +#' \code{species.prevalence}, it may not be possible to reach this particular +#' value because of the availability of environmental conditions. Several +#' runs may be necessary to reach the desired \code{species.prevalence}. +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @return a \code{list} with 3 to 5 elements (depending if the conversion to presence-absence was performed): +#' \itemize{ +#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +#' \item{\code{details}: the details and parameters used to generate the species} +#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +#' environmental suitability)} +#' \item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +#' \item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +#' } +#' The structure of the virtualspecies can object be seen using str() +#' @examples +#' # Create an example stack with six environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a)), +#' raster(exp(a)), +#' raster(log(a))) +#' names(env) <- paste("Var", 1:6, sep = "") +#' +#' # More than 6 variables: by default a PCA approach will be used +#' generateRandomSp(env) +#' +#' # Manually choosing a response approach +#' generateRandomSp(env, approach = "response") +#' +#' # Randomly choosing the approach +#' generateRandomSp(env, approach = "random") +#' +#' + +generateRandomSp <- function(raster.stack, + approach = "automatic", + rescale = TRUE, + convert.to.PA = TRUE, + relations = c("gaussian", "linear", "logistic", "quadratic"), + rescale.each.response = TRUE, + realistic.sp = TRUE, + species.type = "multiplicative", + niche.breadth = "any", + sample.points = FALSE, + nb.points = 10000, + PA.method = "probability", + alpha = -.1, + beta = "random", + species.prevalence = NULL, + plot = TRUE) +{ + if(!(is(raster.stack, "Raster"))) + { + stop("raster.stack must be a raster stack object") + } + if(any(is.na(maxValue(raster.stack)))) + { + raster.stack <- setMinMax(raster.stack) + } + + if(approach == "automatic") + { + if(nlayers(raster.stack) <= 5) + { + approach <- "response" + } else + { + approach <- "pca" + } + } else if (approach == "random") + { + approach <- sample(c("response", "pca"), 1) + } else if(!(approach %in% c("response", "pca"))) + { + stop("Argument approach was misspecified. Either choose 'automatic', 'random', 'response' or 'pca'.") + } + + var.names <- names(raster.stack) + + if(approach == "pca") + { + results <- generateSpFromPCA(raster.stack, + niche.breadth = niche.breadth, + sample.points = sample.points, + nb.points = nb.points, + plot = FALSE) + } else if (approach == "response") + { + parameters <- list() + message(" - Determining species' response to predictor variables\n") + if(any(!(relations %in% c("gaussian", "linear", "logistic", "quadratic")))) + { + stop(paste("Wrong relation type specified: pick among '", + paste(c("gaussian", "linear", "logistic", "quadratic"), collapse = " "), "'", + collapse = " ")) + } + valid.cells <- setValues(raster.stack[[1]], 1) + var.order <- sample(var.names, length(var.names), replace = F) + for (i in 1:length(var.order)) + { + + cur.var <- var.order[i] + cur.rast <- raster.stack[[cur.var]] + if(realistic.sp) cur.rast <- cur.rast * valid.cells # Cur.rast is here restricted to current suitable conds + + type <- sample(relations, 1) + + if (type == "gaussian") + { + parameters[[cur.var]] <- list(fun = 'dnorm', + args = list(mean = sample(seq(cur.rast@data@min, + cur.rast@data@max, + length = 100000), 1), + sd = sample(seq(0, + (raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min), + length = 100000), 1)) + ) + } else if (type == "linear") + { # At the moment this is not really useful because the rescale will transforme the results in either 0:1 or 1:0, regardless of the slope + # To be improved later + parameters[[cur.var]] <- list(fun = 'linearFun', + args = list(a = sample(seq(-1, 1, length = 100), 1), + b = sample(seq(raster.stack[[cur.var]]@data@min, + raster.stack[[cur.var]]@data@max, + length = 100000), 1)) + ) + } else if (type == "logistic") + { + beta.t <- sample(seq(raster.stack[[cur.var]]@data@min, + raster.stack[[cur.var]]@data@max, + length = 1000000), 1) + alpha.t <- sample(c(seq((raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)/1000, + (raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)/100, length = 10), + seq((raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)/100, + (raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)/10, length = 100), + seq((raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)/10, + (raster.stack[[cur.var]]@data@max - raster.stack[[cur.var]]@data@min)*10, length = 10)), size = 1) + if(realistic.sp == TRUE) + { + if(beta.t > cur.rast@data@max) + { + alpha.t <- alpha.t + } else if (beta.t < cur.rast@data@min) + { + alpha.t <- -alpha.t + } else + { + alpha.t <- sample(c(alpha.t, -alpha.t), 1) + } + } + + parameters[[cur.var]] <- list(fun = 'logisticFun', + args = list(alpha = alpha.t, + beta = beta.t) + ) + } else if (type == "quadratic") + { + max.point <- sample(seq(cur.rast@data@min, + cur.rast@data@max, + length = 1000), 1) + a <- sample(seq(-.01, -20, length = 10000), 1) + b <- - max.point * 2 * a + parameters[[cur.var]] <- list(fun = 'quadraticFun', + args = list(a = a, + b = b, + c = 0) + ) + + } + + # Restricting values to suitable conditions + tmp.rast <- calc(raster.stack[[cur.var]], fun = function(x) + { + do.call(match.fun(parameters[[cur.var]]$fun), args = c(list(x), parameters[[cur.var]]$args)) + } + ) + tmp.rast <- (tmp.rast - tmp.rast@data@min) / (tmp.rast@data@max - tmp.rast@data@min) + valid.cells <- valid.cells * (tmp.rast > 0.05) + } + message(" - Calculating species suitability\n") + results <- generateSpFromFun(raster.stack, parameters, species.type = species.type, plot = FALSE) + } + + + + + + if(convert.to.PA == TRUE) + { + message(" - Converting into Presence - Absence\n") + results <- convertToPA(results, + PA.method = PA.method, + alpha = alpha, + beta = beta, + species.prevalence = species.prevalence, + plot = FALSE) + + if(plot) plot(stack(results$suitab.raster, results$pa.raster), main = c("Suitability", "Presence-absence")) + } else + { + if(plot) plot(results$suitab.raster, main = "Suitability") + } + + return(results) +} diff --git a/R/generateSpFromFun.R b/R/generateSpFromFun.R new file mode 100644 index 0000000..c6d0114 --- /dev/null +++ b/R/generateSpFromFun.R @@ -0,0 +1,268 @@ +#' Generate a virtual species distributions with responses to environmental variables +#' +#' This function generates a virtual species distribution from a RasterStack of environmental +#' variables and a defined set of responses to each environmental parameter. +#' +#' @param raster.stack a RasterStack object, in which each layer represent an environmental +#' variable. +#' @param parameters a list containing the functions of response of the species to +#' environmental variables with their parameters. See details. +#' @param rescale \code{TRUE} or \code{FALSE}. If \code{TRUE}, the final probability of presence is rescaled between 0 and 1. +#' @param species.type \code{"additive"}, \code{"multiplicative"} or \code{"mixed"}. Defines +#' how the final probability of presence is calculated: if \code{"additive"}, responses to each +#' variable are summed; if \code{"multiplicative"}, responses are multiplicated; if \code{"mixed"} +#' responses are both summed and multiplicated depending on argument \code{formula} +#' @param formula \code{NULL} to create a random formula to calculate the final probability +#' of presence, or a character string of the form: \code{"layername1 + layername2 * +#' layername3 * etc."} to manually define it. Only used if \code{species.type} is set to +#' \code{"mixed"} +#' @param rescale.each.response \code{TRUE} or \code{FALSE}. If \code{TRUE}, the individual responses to +#' each environmental variable are rescaled between 0 and 1 (see details). +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted. +#' @return a \code{list} with 3 elements: +#' \itemize{ +#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +#' \item{\code{details}: the details and parameters used to generate the species} +#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +#' environmental suitability)} +#' } +#' #' The structure of the virtualspecies object can be seen using str() +#' @seealso \code{\link{generateSpFromPCA}} to generate a virtual species with a PCA approach +#' @details +#' This functions proceeds into several steps: +#' \enumerate{ +#' \item{The response to each environmental variable is calculated with the functions provided +#' in \code{parameters}. This results in a probability of presence for each variable.} +#' \item{If \code{rescale.each.response} is \code{TRUE}, each probability of presence is rescaled between 0 and 1.} +#' \item{The final probability of presence is calculated according to the chosen \code{species.type}.} +#' \item{If \code{rescale} is \code{TRUE}, the final probability of presence is rescaled between 0 and 1.} +#' } +#' The RasterStack containing environmental variables must have consistent names, +#' because they will be checked with the \code{parameters}. For example, they can be named +#' var1, var2, etc. Names can be checked and set with \code{names(my.stack)}. +#' +#' The \code{parameters} have to be carefully created, otherwise the function will not work: +#' \itemize{ +#' \item{Either see \code{\link{formatFunctions}} to easily create your list of parameters} +#' \item{Or create a \code{list} defined according to the following template: \cr +#' \code{list( +#' var1 = list(fun = 'fun1', args = list(arg1 = ..., arg2 = ..., etc.)), +#' var2 = list(fun = 'fun2', args = list(arg1 = ..., arg2 = ..., etc.)))}\cr +#' It is important to keep the same names in the parameters as in the stack of environmental +#' variables. Similarly, argument names must be identical to argument names in the associated +#' function (e.g., if you use \code{fun = 'dnorm'}, then args should look like \code{list(mean = 0, sd = 1)}). +#' +#' See the example section below for more examples.}} +#' +#' +#' Any response function that can be applied to the environmental variables can +#' be chosen here. Several functions are proposed in this package: +#' \code{\link{linearFun}}, \code{\link{logisticFun}} and \code{\link{quadraticFun}}. +#' Another classical example is the normal distribution: \code{\link[stats]{dnorm}}. +#' Ther users can also create and use their own functions. +#' +#' +#' If \code{rescale.each.response = TRUE}, then the probability response to each +#' variable will be normalised between 0 and 1 according to the following formula: +#' P.rescaled = (P - min(P)) / (max(P) - min (P)) +#' This rescaling has a strong impact on response functions, so users may prefer to +#' use \code{rescale.each.response = FALSE} and apply their own rescaling within +#' their response functions. +#' +#' +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Create an example stack with two environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100)) +#' names(env) <- c("variable1", "variable2") +#' plot(env) # Illustration of the variables +#' +#' # Easy creation of the parameter list: +#' # see in real time the shape of the response functions +#' parameters <- formatFunctions(variable1 = c(fun = 'dnorm', mean = 1e-04, +#' sd = 1e-04), +#' variable2 = c(fun = 'linearFun', a = 1, b = 0)) +#' +#' # If you provide env, then you can see the shape of response functions: +#' parameters <- formatFunctions(x = env, +#' variable1 = c(fun = 'dnorm', mean = 1e-04, +#' sd = 1e-04), +#' variable2 = c(fun = 'linearFun', a = 1, b = 0)) +#' +#' # Generation of the virtual species +#' sp1 <- generateSpFromFun(env, parameters) +#' sp1 +#' par(mfrow = c(1, 1)) +#' plot(sp1) +#' +#' +#' # Manual creation of the parameter list +#' # Note that the variable names are the same as above +#' parameters <- list(variable1 = list(fun = 'dnorm', +#' args = list(mean = 0.00012, +#' sd = 0.0001)), +#' variable2 = list(fun = 'linearFun', +#' args = list(a = 1, b = 0))) +#' # Generation of the virtual species +#' sp1 <- generateSpFromFun(env, parameters, plot = TRUE) +#' sp1 +#' plot(sp1) + + +generateSpFromFun <- function(raster.stack, parameters, + rescale = TRUE, + species.type = "multiplicative", formula = NULL, rescale.each.response = TRUE, + plot = FALSE) +{ + approach <- "response" + if(!(is(raster.stack, "Raster"))) + { + stop("raster.stack must be a raster stack object") + } + if(any(is.na(maxValue(raster.stack)))) + { + raster.stack <- setMinMax(raster.stack) + } + n.l <- nlayers(raster.stack) + if(n.l != length(parameters)) + {stop("Provide as many layers in raster.stack as functions on parameters")} + if(any(!(names(parameters) %in% names(raster.stack)) | + !(names(raster.stack) %in% names(parameters)))) + {stop("Layer names and names of parameters must be identical")} + # Checking the structure and consistency of parameters + for (i in 1:length(parameters)) + { + if(any(!(c("fun", "args") %in% names(parameters[[i]])))) + {stop("The structure of parameters does not seem correct. + Please provide function and arguments for variable '", + names(parameters)[i], "'. See help(generateSpFromFun) for more details.", + sep = "")} + test <- tryCatch(match.fun(parameters[[i]]$fun), error = function(c) "error") + if(class(test) != "function") + { + stop(paste("The function ", parameters[[i]]$fun, " does not exist, please verify spelling.", sep = "")) + } + if(any(!(names(parameters[[i]]$args) %in% names(formals(fun = test))))) + { + stop(paste("Arguments of variable '", names(parameters)[i], "' (", + paste(names(parameters[[i]]$args), collapse = ", "), + ") do not match arguments of the associated function\n + List of possible arguments for this function: ", + paste(names(formals(fun = test)), collapse = ", "), sep = "")) + } + rm(test) + } + suitab.raster <- stack(sapply(names(raster.stack), FUN = function(y) + { + calc(raster.stack[[y]], fun = function(x) + { + do.call(match.fun(parameters[[y]]$fun), args = c(list(x), parameters[[y]]$args)) + } + ) + })) + + for (var in names(raster.stack)) + { + parameters[[var]]$min <- raster.stack[[var]]@data@min + parameters[[var]]$max <- raster.stack[[var]]@data@max + } + + if(rescale.each.response) + { + suitab.raster <- stack(sapply(names(suitab.raster), function(y) + { + (suitab.raster[[y]] - suitab.raster[[y]]@data@min) / (suitab.raster[[y]]@data@max - suitab.raster[[y]]@data@min) + })) + } + + if(species.type == "multiplicative") + { + suitab.raster <- raster::overlay(suitab.raster, fun = prod) + } else if (species.type == "additive") + { + suitab.raster <- raster::overlay(suitab.raster, fun = sum) + } else if (species.type == "mixed") + { + layer.names <- sample(names(suitab.raster)) + if(!is.null(formula)) + { + if (length(strsplit(formula, " ")[[1]]) != (nlayers(suitab.raster) * 2 - 1)) + {stop("The entered formula isn't correct. Check that the layer names are correct, and that operators are correctly spaced:\n + formula = 'layername1 + layername2 * layername3 * etc.'")} + else if (any(!(strsplit(formula, " ")[[1]][!(strsplit(formula, " ")[[1]] %in% c("+", "*", "/", "-"))] %in% names(raster.stack)))) + { + stop("The entered formula isn't correct. The layer names in the formula do not seem to correspond to layer names in raster.stack:\n + formula = 'layername1 + layername2 * layername3 * etc.'") + } + } else + { + formula <- layer.names[1] + for (i in layer.names[2:nlayers(suitab.raster)]) + { + formula <- paste(formula, sample(c("*", "+"), 1), i) + } + } + + operators <- strsplit(formula, " ")[[1]] + operators <- operators[(1:length(operators)) %% 2 == 0] + id <- c(1:nlayers(suitab.raster), 1:(nlayers(suitab.raster) - 1) + 0.5) + + mixed.fun <- NULL + eval(parse(text = paste("mixed.fun <- function(", + paste("x", 1:nlayers(suitab.raster), sep = "", collapse = ", "), + ") {", + paste(c(paste("x", 1:nlayers(suitab.raster), sep = ""), operators)[order(id)], collapse = " "), + "}" + ))) + + suitab.raster <- overlay(suitab.raster, fun = mixed.fun) + print(formula) + } else + { + stop("species.type must be either 'multiplicative', 'additive' or 'mixed'") + } + if(rescale) + { + suitab.raster <- (suitab.raster - suitab.raster@data@min) / (suitab.raster@data@max - suitab.raster@data@min) + } + + if(species.type == "mixed") + { + results <- list(approach = approach, + + details = list(variables = names(parameters), + sp.type = c(sp.type = species.type, + formula = formula), + rescale.each.response = rescale.each.response, + rescale = rescale, + parameters = parameters), + suitab.raster = suitab.raster + ) + } else + { + results <- list(approach = approach, + details = list(variables = names(parameters), + sp.type = species.type, + rescale.each.response = rescale.each.response, + rescale = rescale, + parameters = parameters), + suitab.raster = suitab.raster + ) + } + if(plot) + { + plot(results$suitab.raster, main = "Environmental suitability of the virtual species") + } + + class(results) <- append(class(results), "virtualspecies") + + return(results) +} diff --git a/R/generateSpFromPCA.R b/R/generateSpFromPCA.R new file mode 100644 index 0000000..d93f2ae --- /dev/null +++ b/R/generateSpFromPCA.R @@ -0,0 +1,299 @@ +#' Generate a virtual species distribution with a PCA of environmental variables +#' +#' This functions generates a virtual species distribution by computing a +#' PCA among environmental variables, and simulating the response of the species +#' along the two first axes of the PCA. The response to axes of the PCA is +#' determined with gaussian functions. +#' @param raster.stack a RasterStack object, in which each layer represent an environmental +#' variable. +#' @param rescale \code{TRUE} of \code{FALSE}. Should the output suitability raster be +#' rescaled between 0 and 1? +#' @param niche.breadth \code{"any"}, \code{"narrow"} or \code{"wide"}. This parameter +#' defines how tolerant is the species regarding environmental conditions by adjusting +#' the standard deviations of the gaussian functions. See details. +#' @param means a vector containing two numeric values. Will be used to define +#' the means of the gaussian response functions to the axes of the PCA. +#' @param sds a vector containing two numeric values. Will be used to define +#' the standard deviations of the gaussian response functions to the axes of +#' the PCA. +#' @param pca a \code{dudi.pca} object. You can provide a pca object that you +#' computed yourself with \code{\link[ade4]{dudi.pca}} +#' @param remove.collinearity \code{TRUE} of \code{FALSE}. Can be set to \code{TRUE} +#' to remove groups of collinear variables (at the cutoff \code{multicollinearity.cutoff}) +#' in your dataset: only one variable per group will be kept. See \code{\link{removeCollinearity}} +#' for details. +#' @param multicollinearity.cutoff a numeric value between 0 and 1. Only useful if +#' \code{remove.collinearity = TRUE}. The cutoff of collinearity above which +#' variables will be grouped together. +#' @param sample.points \code{TRUE} of \code{FALSE}. If you have a large +#' raster file then use this parameter to sample a number of points equal to +#' \code{nb.points}. +#' @param nb.points a numeric value. Only useful if \code{sample.points = TRUE}. +#' The number of sampled points from the raster, to perform the PCA. A too small +#' value may not be representative of the environmental conditions in your raster. +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted. +#' @note +#' To perform the PCA, the function has to transform the raster into a matrix. +#' This may not be feasible if the raster is too large for the computer's memory. +#' In this case, you should perform the PCA on a sample of your raster with +#' set \code{sample.points = TRUE} and choose the number of points to sample with +#' \code{nb.points}. +#' @details +#' This function proceeds in 3 steps: +#' \enumerate{ +#' \item{A PCA of environmental conditions is generated} +#' \item{Gaussian responses to the first two axes are computed} +#' \item{These responses are multiplied to obtain the final environmental suitability}} +#' +#' The shape of gaussian responses can be determined manually, by defining both +#' \code{means} and \code{sds}, or randomly. The random generation is constrained +#' by the argument \code{niche.breadth}, which controls the range of possible +#' standard deviation values. This range of values is based on +#' a fraction of the axis: +#' \itemize{ +#' \item{\code{"any"}: the standard deviations can have values from 1/100 to 1/2 +#' of the axes' span} +#' \item{\code{"narrow"}: the standard deviations can have values from 1/100 to 1/10 +#' of the axes' span} +#' \item{\code{"wide"}: the standard deviations can have values from 1/10 to 1/2 +#' of the axes' span} +#' } +#' @import raster +#' @export +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @seealso \code{\link{generateSpFromFun}} to generate a virtual species with +#' the responses to each environmental variables. +#' #' @return a \code{list} with 3 elements: +#' \itemize{ +#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"pca"}} +#' \item{\code{details}: the details and parameters used to generate the species} +#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +#' environmental suitability} +#' } +#' The structure of the virtualspecies object can be seen using str() +#' @examples +#' # Create an example stack with four environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a))) +#' names(env) <- c("var1", "var2", "var3", "var4") +#' plot(env) # Illustration of the variables +#' +#' +#' +#' +#' +#' # Generating a species with the PCA +#' +#' generateSpFromPCA(raster.stack = env) +#' +#' # The top part of the plot shows the PCA and the response functions along +#' # the two axes. +#' # The bottom part shows the probabilities of occurrence of the virtual +#' # species. +#' +#' +#' +#' +#' +#' # Defining manually the response to axes +#' +#' generateSpFromPCA(raster.stack = env, +#' means = c(-2, 0), +#' sds = c(0.6, 1.5)) +#' +#' # This species can be seen as occupying intermediate altitude ranges of a +#' # conic mountain. +#' + + +generateSpFromPCA <- function(raster.stack, rescale = TRUE, niche.breadth = "any", + means = NULL, sds = NULL, pca = NULL, + remove.collinearity = FALSE, multicollinearity.cutoff = 0.7, + sample.points = FALSE, nb.points = 10000, + plot = TRUE) +{ + if(!(is(raster.stack, "Raster"))) + { + stop("raster.stack must be a raster stack object") + } + if(sample.points) + { + if(!is.numeric(nb.points)) + {stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")} + env.df <- sampleRandom(raster.stack, size = nb.points, na.rm = TRUE) + } else + { + if(canProcessInMemory(raster.stack, n = 2)) + { + env.df <- getValues(raster.stack) + if(any(is.na(env.df))) + { + env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[, 1]), ] # Removing NAs + } + } else + { + stop("Your computer does not have enough memory to extract all the values from raster.stack. + Use the argument sample.points = TRUE, and adjust the number of points to use with nb.points. More details in ?generateSpFromPCA") + } + } + + if(!is.null(pca)) + { + if(!all(class(pca) %in% c("pca", "dudi"))) + {stop("Please provide an appropriate pca.object (output of dudi.pca()) to make the pca plot.")} + if(any(!(names(pca$tab) %in% names(raster.stack)))) + {stop("The variables used to make the pca must be the same as variables names in raster.stack")} + if(remove.collinearity) + {message(" - Collinearity will not be removed because you have already provided a PCA object.\n")} + + pca.object <- pca + rm(pca) + sel.vars <- names(raster.stack) + } else + { + if(remove.collinearity) + { + sel.vars <- removeCollinearity(raster.stack, select.variables = TRUE, + multicollinearity.cutoff = multicollinearity.cutoff, + sample.points = sample.points, + nb.points = nb.points) + } else + { + sel.vars <- names(raster.stack) + } + raster.stack <- raster.stack[[sel.vars]] + + if(sample.points) + { + if(!is.numeric(nb.points)) + {stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")} + env.df <- sampleRandom(raster.stack, size = nb.points, na.rm = TRUE) + } else + { + env.df <- getValues(raster.stack) + if(any(is.na(env.df))) + { + env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[, 1]), ] # Removing NAs + } + } + + + message(" - Perfoming the pca\n") + pca.object <- ade4::dudi.pca(env.df, scannf = F, nf = 2) + } + message(" - Defining the response of the species along PCA axes\n") + + if(!is.null(means)) + { + if(!is.numeric(means)) + {stop("Please provide numeric means for the gaussian function to compute probabilities of presence")} + if(!is.vector(means) | length(means) != 2) + {stop("Please provide a vector with 2 means for the gaussian function (one for each of the two pca axes)")} + } else + { + means <- pca.object$li[sample(1:nrow(pca.object$li), 1), ][1, ] + means <- c(mean1 = means[1, 1], + mean2 = means[1, 2]) + } + + + if(!is.null(sds)) + { + if(!is.numeric(sds)) + {stop("Please provide numeric standard deviations for the gaussian function to compute probabilities of presence")} + if(!is.vector(sds) | length(sds) != 2) + {stop("Please provide a vector with 2 standard deviations for the gaussian function (one for each of the two pca axes)")} + if(any(sds < 0)) + {stop("The standard deviations must have a positive value!")} + message(" - You have provided standard deviations, so argument niche.breadth will be ignored.\n") + } else + { + # Defining a range of values to determine sds for the gaussian functions + axis1 <- c(min = max(min(pca.object$li[, 1]), + quantile(pca.object$li[, 1], probs = .25) - + 5 * (quantile(pca.object$li[, 1], probs = .75) - + quantile(pca.object$li[, 1], probs = .25))), + max = min(max(pca.object$li[, 1]), + quantile(pca.object$li[, 1], probs = .75) + + 5 * (quantile(pca.object$li[, 1], probs = .75) - + quantile(pca.object$li[, 1], probs = .25)))) + axis2 <- c(min = max(min(pca.object$li[, 2]), + quantile(pca.object$li[, 2], probs = .25) - + 5 * (quantile(pca.object$li[, 2], probs = .75) - + quantile(pca.object$li[, 2], probs = .25))), + max = min(max(pca.object$li[, 2]), + quantile(pca.object$li[, 2], probs = .75) + + 5 * (quantile(pca.object$li[, 2], probs = .75) - + quantile(pca.object$li[, 2], probs = .25)))) + + # Random sampling of parameters + if(niche.breadth == "any") + { + sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/100, (axis1[2] - axis1[1])/2, length = 1000), 1), + sd2 = sample(seq((axis2[2] - axis2[1])/100, (axis2[2] - axis2[1])/2, length = 1000), 1)) + } else if (niche.breadth == "narrow") + { + sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/100, (axis1[2] - axis1[1])/10, length = 1000), 1), + sd2 = sample(seq((axis2[2] - axis2[1])/100, (axis2[2] - axis2[1])/10, length = 1000), 1)) + } else if (niche.breadth == "wide") + { + sds <- c(sd1 = sample(seq((axis1[2] - axis1[1])/10, (axis1[2] - axis1[1])/2, length = 1000), 1), + sd2 = sample(seq((axis2[2] - axis2[1])/10, (axis2[2] - axis2[1])/2, length = 1000), 1)) + } else + { + stop("niche.breadth must be one of these: 'any', 'narrow', 'wide") + } + } + + + message(" - Calculating suitability values\n") + pca.env <- calc(raster.stack[[sel.vars]], fun = function(x, ...) + {.pca.coordinates(x, pca = pca.object, na.rm = TRUE)}) + suitab.raster <- calc(pca.env, fun = function(x, ...){.prob.gaussian(x, means = means, sds = sds)}) + if(rescale) + { + suitab.raster <- (suitab.raster - suitab.raster@data@min) / (suitab.raster@data@max - suitab.raster@data@min) + } + + + if(plot) + { + if(!("null device" %in% names(dev.cur()))) dev.off() + par(mar = c(5.1, 4.1, 4.1, 2.1)) + layout(matrix(nrow = 2, ncol = 1, c(1, 2))) + + plotResponse(x = raster.stack, approach = "pca", + parameters = list(pca = pca.object, + means = means, + sds = sds)) + + image(suitab.raster, axes = T, ann = F, asp = 1, + main = "Environmental suitability of the virtual species", + las = 1, col = rev(terrain.colors(12))) + + + legend(title = "Pixel\nsuitability", "right", inset = c(-0.1, 0), + legend = c(1, 0.8, 0.6, 0.4, 0.2, 0), + fill = terrain.colors(6), bty = "n") + title("Environmental suitability of the virtual species") + } + + results <- list(approach = "pca", + details = list(variables = sel.vars, + pca = pca.object, + rescale = rescale, + axes = c(1,2), # Will be changed later if the choice of axes is implemented + means = means, + sds = sds), + suitab.raster = suitab.raster) + class(results) <- append(class(results), "virtualspecies") + return(results) +} + diff --git a/R/genericfunctions.R b/R/genericfunctions.R new file mode 100644 index 0000000..8631420 --- /dev/null +++ b/R/genericfunctions.R @@ -0,0 +1,145 @@ +#' @export +#' @method print virtualspecies +print.virtualspecies <- function(x, ...) +{ + cat(paste("Virtual species generated from", + length(x$details$variables), + "variables:\n", + paste(x$details$variables, collapse = ", "))) + cat("\n\n- Approach used:") + if(x$approach == "response") + { + cat(" Responses to each variable") + if(x$details$sp.type[1] == "mixed") + { + cat("\n- Species type: ", + x$details$sp.type[1], + ", formula = ", x$details$sp.type[2], sep = "") + } else + { + cat(paste("\n- Species type:", x$details$sp.type[1])) + } + cat("\n- Response functions:") + sapply(x$details$variables, FUN = function(y) + { + cat("\n .", y, + " [min=", x$details$parameters[[y]]$min, + "; max=", x$details$parameters[[y]]$max, + "] : ", + x$details$parameters[[y]]$fun, + " (", + paste(names(x$details$parameters[[y]]$args), + x$details$parameters[[y]]$args, sep = '=', collapse = "; "), + ")", sep = "") + }) + if (x$details$rescale.each.response) + { + cat("\n- Each response function was rescaled between 0 and 1") + } else + { + cat("\n- Response functions were not rescaled between 0 and 1") + } + if (x$details$rescale) + { + cat("\n- Environmental suitability was rescaled between 0 and 1") + } else + { + cat("\n- Environmental suitability was not rescaled between 0 and 1") + } + + } else if(x$approach == "pca") + { + cat(" Response to axes of a PCA") + cat("\n- Axes: ", + paste(x$details$axes, collapse = " & "), + "; ", round(sum(x$details$pca$eig[x$details$axes])/sum(x$details$pca$eig) * 100, 2), + "% explained by these axes") + cat("\n- Responses to axes:") + sapply(c(1, 2), function(y) + { + cat("\n .Axis ", x$details$axes[y], + " [min=", round(min(x$details$pca$li[, x$details$axes[y]]), 2), + "; max=", round(max(x$details$pca$li[, x$details$axes[y]]), 2), + "] : dnorm (mean=", x$details$means[y], "; sd=", x$details$sds[y], + ")", sep = "") + }) + if (x$details$rescale) + { + cat("\n- Environmental suitability was rescaled between 0 and 1") + } else + { + cat("\n- Environmental suitability was not rescaled between 0 and 1") + } + } + if(!is.null(x$PA.conversion)) + { + cat("\n- Converted into presence-absence:") + cat("\n .Method =", x$PA.conversion["conversion.method"]) + if(x$PA.conversion["conversion.method"] == "probability") + { + cat("\n .alpha (slope) =", x$PA.conversion["alpha"]) + cat("\n .beta (inflexion point) =", x$PA.conversion["beta"]) + cat("\n .species prevalence =", x$PA.conversion["species.prevalence"]) + } else if(x$PA.conversion["conversion.method"] == "threshold") + { + cat("\n .threshold =", x$PA.conversion["cutoff"]) + cat("\n .species prevalence =", x$PA.conversion["species.prevalence"]) + } + } + if(!is.null(x$occupied.area)) + { + if(!is.null(x$geographical.limit)) + { + cat("\n- Distribution bias introduced:") + cat("\n .method used :", x$geographical.limit$method) + if(x$geographical.limit$method %in% c("country", "region", "continent")) + { + cat("\n .area(s) :", x$geographical.limit$area) + } else if(x$geographical.limit$method == "extent") + { + cat("\n .extent : [Xmin; Xmax] = [", + x$geographical.limit$extent@xmin, "; ", + x$geographical.limit$extent@xmax, "] - [Ymin; Ymax] = [", + x$geographical.limit$extent@ymin, "; ", + x$geographical.limit$extent@ymax, "]", sep = "") + } else if(x$geographical.limit$method == "polygon") + { + cat("\n .polygon : Object of class ", class(x$geographical.limit$area), sep = "") + } + } + } +} + +#' @export +#' @method str virtualspecies +str.virtualspecies <- function(object, ...) +{ + args <- list(...) + if(is.null(args$max.level)) + { + args$max.level <- 2 + } + NextMethod("str", object = object, max.level = args$max.level) +} + +#' @export +#' @method plot virtualspecies +plot.virtualspecies <- function(x, ...) +{ + y <- raster::stack(x$suitab.raster) + names(y) <- "Suitability.raster" + if(!is.null(x$pa.raster)) + { + y <- stack(y, + x$pa.raster) + names(y)[[nlayers(y)]] <- "Presence.absence.raster" + } + if(!is.null(x$occupied.area)) + { + y <- stack(y, + x$occupied.area) + names(y)[[nlayers(y)]] <- "Occupied.area.raster" + } + x <- y + plot(x, ...) +} \ No newline at end of file diff --git a/R/limitDistribution.R b/R/limitDistribution.R new file mode 100644 index 0000000..98d1248 --- /dev/null +++ b/R/limitDistribution.R @@ -0,0 +1,254 @@ +#' Limit a virtual species distribution to a defined area +#' +#' @description +#' This function is designed to limit species distributions to a subsample of +#' their total distribution range. It will thus generate a species which is not +#' at the equilibrium with its environment (i.e., which did not occupy the full +#' range of suitable environmental conditions). +#' +#' This function basically takes any type of raster and will limit values above +#' 0 to areas where the species is allowed to disperse. +#' @param x a \code{rasterLayer} object composed of 0, 1 and NA, or the output list from +#' \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} +#' or \code{\link{generateRandomSp}} +#' @param geographical.limit \code{"country"}, \code{"region"}, +#' \code{"continent"}, \code{"polygon"} or \code{"extent"}. The method used +#' to limit the distribution range: see details. +#' @param area \code{NULL}, a character string, a \code{polygon} or an \code{extent} object. +#' The area in which the distribution range will be limited: see details. If \code{NULL} +#' and \code{geographical.limit = "extent"}, then you will be asked to draw an +#' extent on the map. +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the resulting limited +#' distribution will be plotted. +#' @details +#' \bold{How the function works:} +#' +#' The function will remove occurrences of the species outside the chosen area: +#' \itemize{ +#' \item{NA are kept unchanged} +#' \item{0 are kept unchanged} +#' \item{values > 0 within the limits of \code{area} are kept unchanged} +#' \item{values > 0 outside the limits of \code{area} are set to 0} +#' } +#' +#' +#' \bold{How to define the area in which the range is limited:} +#' +#' You can choose to limit the distribution range of the species to: +#' \enumerate{ +#' \item{a particular country, region or continent (assuming your raster has +#' the WGS84 projection): +#' +#' Set the argument +#' \code{geographical.limit} to \code{"country"}, \code{"region"} or +#' \code{"continent"}, and provide the name(s) of the associated countries, +#' regions or continents to \code{area} (see examples). +#' +#' List of possible \code{area} names: +#' \itemize{ +#' \item{Countries: type \code{levels(getMap()@@data$SOVEREIGNT)} in the console} +#' \item{Regions: "Africa", "Antarctica", "Asia", "Australia", "Europe", +#' "North America", "South America"} +#' \item{Continents: "Africa", "Antarctica", "Australia", "Eurasia", +#' "North America", "South America"}} +#' } +#' \item{a polygon: +#' +#' Set \code{geographical.limit} to \code{"polygon"}, and provide your +#' polygon to \code{area}. +#' } +#' \item{an extent object: +#' +#' Set \code{geographical.limit} to \code{"extent"}, and either provide your +#' extent object to \code{area}, or leave it \code{NULL} to draw an extent on +#' the map.} +#' } +#' @return +#' a \code{list} containing 7 elements: +#' \itemize{ +#' \item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +#' \item{\code{details}: the details and parameters used to generate the species} +#' \item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +#' environmental suitability)} +#' \item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +#' \item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +#' \item{\code{geographical.limit}: the method used to +#' limit the distribution and the area in which the distribution is restricted} +#' \item{\code{occupied.area}: the area occupied by the virtual species as a +#' Raster of presence-absence} +#' } +#' The structure of the virtualspecies object can be seen using str() +#' a \code{list} containing: +#' \itemize{ +#' \item{\code{geographical.limit}: a list with two elements, the method used to +#' limit the distribution and the area in which the distribution is restricted} +#' \item{\code{occupied.area}: the area occupied by the virtual species as a +#' Raster of presence-absence} +#' } +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Create an example stack with six environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a)), +#' raster(exp(a)), +#' raster(log(a))) +#' names(env) <- paste("Var", 1:6, sep = "") +#' +#' # More than 6 variables: by default a PCA approach will be used +#' sp <- generateRandomSp(env) +#' +#' # limiting the distribution to a specific extent +#' limit <- extent(0.5, 0.7, 0.6, 0.8) +#' +#' limitDistribution(sp, area = limit) + + + +limitDistribution <- function(x, + geographical.limit = "extent", + area = NULL, + plot = TRUE) +{ + + + if("virtualspecies" %in% class(x)) + { + if(class(x$pa.raster) == "RasterLayer") + { + sp.raster <- x$pa.raster + results <- x + } else stop("x must be:\n- a raster layer object\nor\n- the output list from function convertToPA(), generateSpFromFun(), generateSpFromPCA() or generateRandomSp()") + } else if ("RasterLayer" %in% class(x)) + { + sp.raster <- x + results <- list(sp.raster) + } else stop("x must be:\n- a raster layer object\nor\n- the output list from function convertToPA(), generateSpFromFun(), generateSpFromPCA() or generateRandomSp()") + + + if(length(geographical.limit) > 1) + { + stop('Only one dispersal limit method can be applied at a time') + } + + if (!(geographical.limit %in% c("country", "region", "extent", "polygon", "continent"))) + { + stop('Argument geographical.limit must be one of : country, region, continent, polygon, extent') + } + + + if (geographical.limit %in% c("country", "region", "continent")) + { + if(!("rworldmap" %in% rownames(installed.packages()))) + { + stop('You need to install the package "rworldmap" in order to use geographical.limit = "region" or "country" or "continent"') + } + worldmap <- rworldmap::getMap() + + if(geographical.limit == "country") + { + if (any(!(area %in% levels(worldmap@data$SOVEREIGNT)))) + { + stop("area name(s) must be correctly spelled. Type 'levels(getMap()@data$SOVEREIGNT)' to obtain valid names.") + } + results$geographical.limit <- list(method = geographical.limit, + area = area) + } else if (geographical.limit == "region") + { + if (any(!(area %in% levels(worldmap@data$REGION)))) + { + stop(paste("region name(s) must be correctly spelled, according to one of the following : ", + paste(levels(worldmap@data$REGION), collapse = ", "), sep = "\n")) + } + results$geographical.limit <- list(method = geographical.limit, + area = area) + } else if (geographical.limit == "continent") + { + if (any(!(area %in% levels(worldmap@data$continent)))) + { + stop(paste("region name(s) must be correctly spelled, according to one of the following : ", + paste(levels(worldmap@data$continent), collapse = ", "), sep = "\n")) + } + results$geographical.limit <- list(method = geographical.limit, + area = area) + } + } else if (geographical.limit == "polygon") # Projections are not checked here. Perhaps we should add projection check between raster & polygon in the future? + # This is especially important given that randomPoints weights samplings by the cell area (because cells closer to + # the equator are larger) + { + if(!(class(area) %in% c("SpatialPolygons", "SpatialPolygonsDataFrame"))) + { + stop("If you choose geographical.limit = 'polygon', please provide a polygon of class SpatialPolygons or SpatialPolygonsDataFrame to argument area") + } + warning("Polygon projection is not checked. Please make sure you have the same projections between your polygon and your presence-absence raster") + results$geographical.limit <- list(method = geographical.limit, + area = area) + } else if(geographical.limit == "extent") + { + if(!(class(area) == "Extent")) + { + message("No object of class extent (or wrong object) provided: click twice on the map to draw the extent in which presence points will be sampled") + plot(sp.raster) + area <- drawExtent(show = TRUE) + } + } + + geographical.limit.raster <- sp.raster + + if(geographical.limit == "country") + { + geographical.limit.raster1 <- rasterize(worldmap[which(worldmap@data$SOVEREIGNT %in% area), ], + geographical.limit.raster, + field = 1, + background = 0, + silent = TRUE) + geographical.limit.raster <- geographical.limit.raster * geographical.limit.raster1 + } else if(geographical.limit == "region") + { + geographical.limit.raster1 <- rasterize(worldmap[which(worldmap@data$REGION %in% area), ], + geographical.limit.raster, + field = 1, + background = 0, + silent = TRUE) + geographical.limit.raster <- geographical.limit.raster * geographical.limit.raster1 + } else if(geographical.limit == "continent") + { + geographical.limit.raster1 <- rasterize(worldmap[which(worldmap@data$continent %in% area), ], + geographical.limit.raster, + field = 1, + background = 0, + silent = TRUE) + geographical.limit.raster <- geographical.limit.raster * geographical.limit.raster1 + } else if(geographical.limit == "extent") + { + geographical.limit.raster <- geographical.limit.raster * rasterize(area, + sp.raster, + field = 1, + background = 0) + results$geographical.limit <- list(method = geographical.limit, + extent = area) + } else if(geographical.limit == "polygon") + { + geographical.limit.raster1 <- rasterize(area, + geographical.limit.raster, + field = 1, + background = 0, + silent = TRUE) + geographical.limit.raster <- geographical.limit.raster * geographical.limit.raster1 + } + + if(plot) + { + plot(geographical.limit.raster) + } + results$occupied.area <- geographical.limit.raster + return(results) +} \ No newline at end of file diff --git a/R/plotResponse.R b/R/plotResponse.R new file mode 100644 index 0000000..03f1142 --- /dev/null +++ b/R/plotResponse.R @@ -0,0 +1,372 @@ +#' Visualise the response of the virtual species to environmental variables +#' +#' This function plots the relationships between the virtual species and the environmental variables. +#' It requires either the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, +#' \code{\link{generateRandomSp}}, +#' or a manually defined set of environmental variables and response functions. +#' +#' @param x the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, +#' \code{\link{generateRandomSp}}, or +#' a raster layer/stack of environmental variables (see details for the latter). +#' @param parameters in case of manually defined response functions, a list +#' containing the associated parameters. See details. +#' @param approach in case of manually defined response functions, the chosen +#' approach: either \code{"response"} for a per-variable response approach, or +#' \code{"pca"} for a PCA approach. +#' @param rescale \code{TRUE} or \code{FALSE}. If \code{TRUE}, individual response +#' plots are rescaled between 0 and 1. +#' @param ... further arguments to be passed to \code{plot}. See +#' \code{\link[graphics]{plot}} and \code{\link[graphics]{par}}. +#' @details +#' If you provide the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} or +#' \code{\link{generateRandomSp}} +#' then the function will automatically make the appropriate plots. +#' +#' Otherwise, you can provide a raster layer/stack of environmental variables to +#' \code{x} and a list of functions to \code{parameters} to perform the plot. +#' In that case, you have to specifiy the \code{approach}: \code{"reponse"} or +#' \code{"PCA"}: +#' \itemize{ +#' \item{if \code{approach = "response"}: Provide to \code{parameters} a +#' \code{list} exactly as defined in \code{\link{generateSpFromFun}}:\cr +#' \code{list( +#' var1 = list(fun = 'fun1', args = list(arg1 = ..., arg2 = ..., etc.)), +#' var2 = list(fun = 'fun2', args = list(arg1 = ..., arg2 = ..., etc.)))}\cr +#' +#' } +#' \item{if \code{approach = "PCA"}: Provide to \code{parameters} a +#' \code{list} containing the following elements: +#' \itemize{ +#' \item{\code{pca}: a \code{dudi.pca} object computed with +#' \code{\link[ade4]{dudi.pca}}} +#' \item{\code{means}: a vector containing two numeric values. Will be used to define +#' the means of the gaussian response functions to the axes of the PCA.} +#' \item{\code{sds} a vector containing two numeric values. Will be used to define +#' the standard deviations of the gaussian response functions to the axes of +#' the PCA.}} +#' } +#' } +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Create an example stack with four environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a))) +#' names(env) <- c("var1", "var2", "var3", "var4") +#' +#' # Per-variable response approach: +#' parameters <- formatFunctions(var1 = c(fun = 'dnorm', mean = 0.00012, +#' sd = 0.0001), +#' var2 = c(fun = 'linearFun', a = 1, b = 0), +#' var3 = c(fun = 'quadraticFun', a = -20, b = 0.2, +#' c = 0), +#' var4 = c(fun = 'logisticFun', alpha = -0.001, +#' beta = 0.002)) +#' sp1 <- generateSpFromFun(env, parameters, plot = TRUE) +#' plotResponse(sp1) +#' +#' # PCA approach: +#' sp2 <- generateSpFromPCA(env, plot = FALSE) +#' par(mfrow = c(1, 1)) +#' plotResponse(sp2) +#' + +plotResponse <- function(x, parameters = NULL, approach = NULL, rescale = TRUE, ...) +{ + if(is(x, "Raster")) + { + if(any(is.na(maxValue(x, warn = FALSE)))) + { + x <- setMinMax(x) + } + if (length(approach) > 1) {stop("Only one approach can be plotted at a time")} + if (approach == "response") + { + if (!is.list(parameters)) + { + stop("If you choose the response approach please provide the parameters") + } + if(nlayers(x) != length(parameters)) + { + stop("Provide as many layers in x as functions on parameters") + } + if(any(!(names(parameters) %in% names(x)) | !(names(x) %in% names(parameters)))) + { + stop("Layer names and names of parameters must be identical") + } + for (var in names(x)) + { + parameters[[var]]$min <- x[[var]]@data@min + parameters[[var]]$max <- x[[var]]@data@max + } + parameters + } else if (approach == "pca") + { + if(any(!(parameters$variables %in% names(x)))) + { + stop("The PCA does not seem to have been computed with the same variables + as in x.") + } + if(!is.list(parameters)) {stop("Please provide an appropriate list of parameters to draw the plots + (see the help for details)")} + if(!all(class(parameters$pca) %in% c("pca", "dudi"))) + {stop ("Please provide an appropriate pca.object (output of dudi.pca()) to make the pca plot.\n + If you don't know how to obtain the pca, try to first run generateSpFromPCA() + and provide the output to plotResponse()")} + if(!(is.numeric(parameters$means)) | !(is.numeric(parameters$sds))) + {stop ("Please provide appropriate means & standard deviations to elements 'means' and 'sds' of parameters. + If you don't know how to provide these, try to first run generateSpFromPCA() + to responsePlot()") + } + details <- parameters + } else if (is.null(approach)) + { + stop("Please choose the approach: 'response' or 'pca'.") + } + } else if ("virtualspecies" %in% class(x)) + { + if (any(!(c("approach", "details", "suitab.raster") %in% names(x)))) + { + stop("x does not seem to be a valid object: + Either provide an output from functions generateSpFromFun(), generateSpFromPCA() or generateRandomSp() or a raster object") + } + approach <- x$approach + details <- x$details + parameters <- x$details$parameters + } else + { + stop("x does not seem to be a valid object: + Either provide an output from functions generateSpFromFun(), generateSpFromPCA() or generateRandomSp() or a raster object") + } + if (approach == "response") + { + mfrow <- c(floor(sqrt(length(parameters))), + ceiling(sqrt(length(parameters)))) + if (prod(mfrow) < length(parameters)) {mfrow[1] <- mfrow[1] + 1} + par(mfrow = mfrow, + mar = c(4.1, 4.1, 0.1, 0.1)) + for(i in names(parameters)) + { + cur.seq <- seq(parameters[[i]]$min, + parameters[[i]]$max, + length = 1000) + if(rescale) + { + values <- do.call(match.fun(parameters[[i]]$fun), args = c(list(cur.seq), parameters[[i]]$args)) + values <- (values - min(values)) / (max(values) - min(values)) + # Formating plotting arguments + defaults <- list(x = cur.seq, y = values, type = "l", bty = "l", + cex.axis = .7, ylab = "Suitability", xlab = "", + las = 1, cex = .5, cex.lab = 1) + args <- modifyList(defaults, list( ...)) + do.call("plot", args) + mtext(side = 1, text = i, line = 2, cex = args$cex.lab) + } else + { + values <- do.call(match.fun(parameters[[i]]$fun), args = c(list(cur.seq), parameters[[i]]$args)) + # Formating plotting arguments + defaults <- list(x = cur.seq, + y = values, + type = "l", bty = "l", + cex.axis = .7, ylab = "Suitability", xlab = "", + las = 1, cex = .5, cex.lab = 1) + args <- modifyList(defaults, list( ...)) + do.call("plot", args) + mtext(side = 1, text = i, line = 2, cex = args$cex.lab) + } + } + } else if (approach == "pca") + { + pca.object <- details$pca + means <- details$means + sds <- details$sds + probabilities <- apply(pca.object$li, 1, .prob.gaussian, means = means, sds = sds) + probabilities <- (probabilities - min(probabilities)) / (max(probabilities) - min(probabilities)) + + xmin <- min(pca.object$li[, 1]) - 0.3 * diff(range(pca.object$li[, 1])) + xmax <- max(pca.object$li[, 1]) + ymin <- min(pca.object$li[, 2]) - 0.3 * diff(range(pca.object$li[, 2])) + ymax <- max(pca.object$li[, 2]) + + par(mar = c(4.1, 4.1, 4.1, 4.6)) + defaults <- list(x = pca.object$li, + col = c(grey(.8), rev(heat.colors(150))[51:200])[match(round(probabilities * 100, 0), 0:100)], + xlim = c(xmin, xmax), + ylim = c(ymin, ymax), + main = "PCA of environmental conditions", + bty = "n", + las = 1, cex.axis = .7, pch = 16) + args <- modifyList(defaults, list(...)) + do.call("plot", defaults) + +# points(means[2] ~ means[1], pch = 16) + polygon(sqrt((sds[1] * cos(seq(0, 2 * pi, length = 100)))^2 + (sds[2] * sin(seq(0, 2 * pi, length = 100)))^2) * + cos(atan2(sds[2] * sin(seq(0, 2 * pi, length = 100)), + sds[1] * cos(seq(0, 2 * pi, length = 100)))) + means[1], + sqrt((sds[1] * cos(seq(0, 2 * pi, length = 100)))^2 + (sds[2] * sin(seq(0, 2 * pi, length = 100)))^2) * + sin(atan2(sds[2] * sin(seq(0, 2 * pi, length = 100)), + sds[1] * cos(seq(0, 2 * pi, length = 100)))) + means[2], + col = NA, lty = 1, lwd = 1, border = NULL) + + segments(x0 = means[1] - sds[1], x1 = means[1] - sds[1], + y0 = ymin - 2 * diff(c(ymin, ymax)), + y1 = means[2], lty = 3) + + segments(x0 = means[1] + sds[1], x1 = means[1] + sds[1], + y0 = ymin - 2 * diff(c(ymin, ymax)), + y1 = means[2], lty = 3) + + segments(x0 = xmin - 2 * diff(c(xmin, xmax)), + x1 = means[1], + y0 = means[2] - sds[2], y1 = means[2] - sds[2], lty = 3) + + segments(x0 = xmin - 2 * diff(c(xmin, xmax)), + x1 = means[1], + y0 = means[2] + sds[2], y1 = means[2] + sds[2], lty = 3) + cutX <- diff(c(xmin, xmax)) * 2/3 + xmin + cutY <- diff(c(ymin, ymax)) * 2/3 + ymin + if(means[1] <= cutX & means[2] <= cutY) + { + x0 <- xmax - 0.15 * diff(c(xmin, xmax)) + y0 <- ymax - 0.15 * diff(c(ymin, ymax)) + x1 <- pca.object$co[, 1] + x0 + y1 <- pca.object$co[, 2] + y0 + } else if(means[1] > cutX & means[2] <= cutY) + { + x0 <- xmin + 0.25 * diff(c(xmin, xmax)) + y0 <- ymax - 0.15 * diff(c(ymin, ymax)) + x1 <- pca.object$co[, 1] + x0 + y1 <- pca.object$co[, 2] + y0 + } else if(means[1] <= cutX & means[2] > cutY) + { + x0 <- xmax - 0.15 * diff(c(xmin, xmax)) + y0 <- ymin + 0.25 * diff(c(ymin, ymax)) + x1 <- pca.object$co[, 1] + x0 + y1 <- pca.object$co[, 2] + y0 + } else if(means[1] > cutX & means[2] > cutY) + { + x0 <- xmin + 0.25 * diff(c(xmin, xmax)) + y0 <- ymin + 0.25 * diff(c(ymin, ymax)) + x1 <- pca.object$co[, 1] + x0 + y1 <- pca.object$co[, 2] + y0 + } + + par(xpd = T) + x1y1 <- cbind(x1, y1) + apply(x1y1, 1, FUN = function(x, a = x0, b = y0) + { + .arrows(x0 = a, y0 = b, x1 = x[1], y1 = x[2]) + }) + + .arrowLabels(x = x1, y = y1, + label = rownames(pca.object$co), clabel = 1, + origin = c(x0, y0)) + + legend(title = "Pixel\nsuitability", "topright", inset = c(-0.1, 0), + legend = c(1, 0.8, 0.6, 0.4, 0.2, 0), + pch = 16, col = c(heat.colors(150)[c(1, 21, 41, 61, 81)], grey(.8)), bty = "n") + + + par(new = T) + + valY <- dnorm(seq(xmin, + xmax, + length = 1000), + mean = means[1], + sd = sds[1]) + valY <- 0.15 * (valY - min(valY))/(max(valY) - min(valY)) + valX <- seq(xmin, + xmax, + length = 1000) + + plot(valY ~ valX, + type = "l", bty = "n", + ylim = c(0, 1), + lty = 1, + xlab = "", ylab = "", xaxt = "n", yaxt = "n") + par(new = T) + + + valY <- seq(ymin, + ymax, + length = 1000) + valX <- dnorm(seq(ymin, + ymax, + length = 1000), + mean = means[2], + sd = sds[2]) + valX <- 0.15 * (valX - min(valX))/(max(valX) - min(valX)) + plot(valX, + valY, + type = "l", bty = "n", + xlim = c(0, 1), + lty = 1, + xlab = "", ylab = "", xaxt = "n", yaxt = "n") + + } else + { + stop("The argument approach was not valid, please provide either 'response' or 'pca'") + } +} + + + +.arrows <- function(x0, y0, x1, y1, len = 0.1, ang = 15, lty = 1, + edge = TRUE) +{ + d0 <- sqrt((x0 - x1)^2 + (y0 - y1)^2) + if (d0 < 1e-07) + return(invisible()) + segments(x0, y0, x1, y1, lty = lty) + h <- strheight("A", cex = par("cex")) + if (d0 > 2 * h) { + x0 <- x1 - h * (x1 - x0)/d0 + y0 <- y1 - h * (y1 - y0)/d0 + if (edge) + arrows(x0, y0, x1, y1, angle = ang, length = len, + lty = 1) + } +} + + +.arrowLabels <- function(x, y, label, clabel, origin = c(0, 0), boxes = FALSE) +{ + xref <- x - origin[1] + yref <- y - origin[2] + for (i in 1:(length(x))) { + cha <- as.character(label[i]) + cha <- paste(" ", cha, " ", sep = "") + cex0 <- par("cex") * clabel + xh <- strwidth(cha, cex = cex0) + yh <- strheight(cha, cex = cex0) * 5/6 + if ((xref[i] > yref[i]) & (xref[i] > -yref[i])) { + x1 <- x[i] + xh/2 + y1 <- y[i] + } + else if ((xref[i] > yref[i]) & (xref[i] <= (-yref[i]))) { + x1 <- x[i] + y1 <- y[i] - yh + } + else if ((xref[i] <= yref[i]) & (xref[i] <= (-yref[i]))) { + x1 <- x[i] - xh/2 + y1 <- y[i] + } + else if ((xref[i] <= yref[i]) & (xref[i] > (-yref[i]))) { + x1 <- x[i] + y1 <- y[i] + yh + } + if (boxes) { + rect(x1 - xh/2, y1 - yh, x1 + xh/2, y1 + yh, col = "white", + border = 1) + } + text(x1, y1, cha, cex = cex0) + } +} \ No newline at end of file diff --git a/R/removeCollinearity.R b/R/removeCollinearity.R new file mode 100644 index 0000000..6330670 --- /dev/null +++ b/R/removeCollinearity.R @@ -0,0 +1,163 @@ +#' Remove collinearity among variables of a a raster stack +#' +#' This functions analyses the correlation among variables of the provideded +#' stack of environmental variables (using Pearson's R), and can return a +#' vector containing names of variables that are not intercorrelated, or a list +#' containing grouping variables according to their degree of collinearity. +#' +#' @param raster.stack a RasterStack object, in which each layer represent an environmental +#' variable. +#' @param multicollinearity.cutoff a numeric value corresponding to the cutoff +#' of correlation above which to group variables. +#' @param select.variables \code{TRUE} or \code{FALSE}. If \code{TRUE}, then the +#' function will choose one variable among each group to return a vector of +#' non correlated variables (see details). If \code{FALSE}, the function will return a list +#' containing the groups of correlated variables. +#' @param sample.points \code{TRUE} or \code{FALSE}. If you have a large +#' raster file then use this parameter to sample a number of points equal to +#' \code{nb.points}. +#' @param nb.points a numeric value. Only useful if \code{sample.points = TRUE}. +#' The number of sampled points from the raster, to perform the PCA. A too small +#' value may not be representative of the environmental conditions in your raster. +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the hierarchical +#' ascendant classification used to group variables will be plotted. +#' @return +#' a vector of non correlated variables, or a list where each element is a +#' group of non correlated variables. +#' @details +#' This function uses the Pearson's correlation coefficient to analyse +#' correlation among variables. This coefficient is then used to compute a +#' distance matrix, which in turn is used it compute an ascendant hierarchical +#' classification, with the '\emph{complete}' method (see +#' \code{\link[stats]{hclust}}). If at least one correlation above the \code{ +#' multicollinearity.cutoff} is detected, then the variables will be grouped +#' according to their degree of correlation. +#' +#' If \code{select.variables = TRUE}, then the function will return a vector +#' containing variables that are not intercorrelated. +#' The variables not correlated to any other variables are automatically included +#' in this vector. For each group of intercorrelated variables, one variable will +#' be randomly chosen and included in this vector. +#' +#' +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Create an example stack with six environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a)), +#' raster(exp(a)), +#' raster(log(a))) +#' names(env) <- paste("Var", 1:6, sep = "") +#' +#' # Defaults settings: cutoff at 0.7 +#' removeCollinearity(env, plot = TRUE) +#' +#' # Changing cutoff to 0.5 +#' removeCollinearity(env, plot = TRUE, multicollinearity.cutoff = 0.5) +#' +#' # Automatic selection of variables not intercorrelated +#' removeCollinearity(env, plot = TRUE, select.variables = TRUE) +#' +#' # Assuming a very large raster file: selecting a subset of points +#' removeCollinearity(env, plot = TRUE, select.variables = TRUE, +#' sample.points = TRUE, nb.points = 5000) +#' +#' + +removeCollinearity <- function(raster.stack, multicollinearity.cutoff = .7, + select.variables = FALSE, sample.points = FALSE, + nb.points = 10000, plot = FALSE) +{ + if(sample.points) + { + if(!is.numeric(nb.points)) + {stop("nb.points must be a numeric value corresponding to the number of pixels to sample from raster.stack")} + env.df <- sampleRandom(raster.stack, size = nb.points, na.rm = TRUE) + } else + { + env.df <- getValues(raster.stack) + if(any(is.na(env.df))) + { + env.df <- env.df[-unique(which(is.na(env.df), arr.ind = T)[, 1]), ] # Removing NAs + } + } + + if(!is.numeric(multicollinearity.cutoff)) + {stop("You must provide a numeric cutoff between 0 and 1 in multicollinearity.cutoff")} else + if(multicollinearity.cutoff > 1 | multicollinearity.cutoff < 0) + {stop("You must provide a numeric cutoff between 0 and 1 in multicollinearity.cutoff")} + + # Correlation matrix creation + cor.matrix <- matrix(data = 0, + nrow = nlayers(raster.stack), + ncol = nlayers(raster.stack), + dimnames = list(names(raster.stack), names(raster.stack))) + + # Correlation based on Pearson + cor.matrix <- 1 - abs(cor(env.df, method = "pearson")) + + # Transforming the correlation matrix into an ascendent hierarchical classification + dist.matrix <- as.dist(cor.matrix) + ahc <- stats::hclust(dist.matrix, method = "complete") + groups <- stats::cutree(ahc, h = 1 - multicollinearity.cutoff) + if(length(groups) == max(groups)) + { + message(paste(" - No multicollinearity detected in your data at threshold ", multicollinearity.cutoff, "\n", sep = "")) + mc <- FALSE + } else + { mc <- TRUE } + + if(plot) + { + par(mar = c(5.1, 5.1, 4.1, 3.1)) + plot(ahc, hang = -1, xlab = "", ylab = "Distance (1 - Pearson's r)", + main = "", las = 1, + sub = "", axes = F) + axis(2, at = seq(0, 1, length = 6), las = 1) + if(mc) + { + title(paste('Groups of intercorrelated variables at cutoff', + multicollinearity.cutoff)) + par(xpd = T) + rect.hclust(ahc, h = 1 - multicollinearity.cutoff) + } else + { + title(paste('No intercorrelation among variables at cutoff', + multicollinearity.cutoff)) + } + + } + + # Random selection of variables + if(select.variables) + { + sel.vars <- NULL + for (i in 1:max(groups)) + { + sel.vars <- c(sel.vars, sample(names(groups[groups == i]), 1)) + } + } else + { + if(mc) + { + sel.vars <- list() + for (i in groups) + { + sel.vars[[i]] <- names(groups)[groups == i] + } + } else + { + sel.vars <- names(raster.stack) + } + } + return(sel.vars) +} diff --git a/R/sampleOccurrences.R b/R/sampleOccurrences.R new file mode 100644 index 0000000..b968765 --- /dev/null +++ b/R/sampleOccurrences.R @@ -0,0 +1,562 @@ +#' Sample occurrences in a virtual species distribution +#' +#' @description +#' This function samples presences within a species distribution, either +#' randomly or with a sampling bias. The sampling bias can be defined manually +#' or with a set of pre-defined biases. +#' +#' @param x a \code{rasterLayer} object or the output list from +#' \code{generateSpFromFun}, \code{generateSpFromPCA}, \code{generateRandomSp}, \code{convertToPA} +#' or \code{limitDistribution} +#' The raster must contain values of 0 or 1 (or NA). +#' @param n an integer. The number of occurrence points to sample. +#' @param type \code{"presence only"} or \code{"presence-absence"}. The type of +#' occurrence points to sample. +#' @param sampling.area a character string, a \code{polygon} or an \code{extent} object. +#' The area in which the sampling will take place. See details. +#' @param detection.probability a numeric value between 0 and 1, corresponding to the +#' probability of detection of the species. See details. +#' @param correct.by.suitability \code{TRUE} or \code{FALSE}. If \code{TRUE}, then +#' the probability of detection will be weighted by the suitability, such that +#' cells with lower suitabilities will further decrease the chance that the species +#' is detected when sampled. +#' @param error.probability \code{TRUE} or \code{FALSE}. Only useful if +#' \code{type = "presence-absence"}. Probability to attribute an erroneous presence +#' in cells where the species is absent. +#' @param bias \code{"no.bias"}, \code{"country"}, \code{"region"}, +#' \code{"extent"}, \code{"polygon"} or \code{"manual"}. The method used to +#' generate a sampling bias: see details. +#' @param bias.strength a positive numeric value. The strength of the bias to be applied +#' in \code{area} (as a multiplier). Above 1, \code{area} will be oversampled. Below 1, \code{area} +#' will be undersampled. +#' @param bias.area \code{NULL}, a character string, a \code{polygon} or an \code{extent} object. +#' The area in which the sampling will be biased: see details. If \code{NULL} +#' and \code{bias = "extent"}, then you will be asked to draw an +#' extent on the map. +#' @param weights \code{NULL} or a raster layer. Only used if \code{bias = "manual"}. +#' The raster of bias weights to be applied to the sampling of occurrences. +#' Higher weights mean a higher probability of sampling. +#' @param sample.prevalence \code{NULL} or a numeric value between 0 and 1. Only useful if +#' \code{type = "presence-absence"}. Defines the sample prevalence, i.e. the proportion of presences +#' sampled. Note that the probabilities of detection and error are applied AFTER this parameter, +#' so the final sample prevalence may not different if you apply probabilities of detection and/or error +#' @param plot \code{TRUE} or \code{FALSE}. If \code{TRUE}, the sampled occurrence +#' points will be plotted. +#' @details +#' \bold{How the function works:} +#' +#' The function randomly selects \code{n} cells in which samples occur. If a \code{bias} +#' is chosen, then the selection of these cells will be biased according to the type and +#' strength of bias chosen. If the sampling is of \code{type "presence only"}, then only +#' cells where the species is present will be chosen. If the sampling is of +#' \code{type "presence-absence"}, then all non-NA cells can be chosen. +#' +#' The function then samples the species inside the chosen cells. In cells +#' where the species is present the species will always be sampled unless +#' the parameter \code{detection.probability} is lower than 1. In that case the +#' species will be sampled with the associated probability of detection. +#' +#' In cells where the species is absent (in case of a \code{"presence-absence"} +#' sampling), the function will always assign absence unless \code{error.probability} +#' is greater than 1. In that case, the species can be found present with the +#' associated probability of error. Note that this step happens AFTER the detection +#' step. Hence, in cells where the species is present but not detected, it can +#' still be sampled due to a sampling error. +#' +#' \bold{How to restrict the sampling area:} +#' +#' Use the argument \code{sampling.area}: +#' \itemize{ +#' \item{Provide the name (s) (or a combination of names) of country(ies), region(s) or continent(s). +#' Examples: +#' \itemize{ +#' \item{\code{sampling.area = "Africa"}} +#' \item{\code{sampling.area = c("Africa", "North America", "France")}} +#' }} +#' \item{Provide a polygon (\code{SpatialPolygons} or \code{SpatialPolygonsDataFrame} +#' of package \code{sp})} +#' \item{Provide an \code{extent} object} +#' } +#' +#' \bold{How the sampling bias works:} +#' +#' The argument \code{bias.strength} indicates the strength of the bias. +#' For example, a value of 50 will result in 50 times more samples within the +#' \code{bias.area} than outside. +#' Conversely, a value of 0.5 will result in half less samples within the +#' \code{bias.area} than outside. +#' +#' \bold{How to choose where the sampling is biased:} +#' +#' You can choose to bias the sampling in: +#' \enumerate{ +#' \item{a particular country, region or continent (assuming your raster has +#' the WGS84 projection): +#' +#' Set the argument +#' \code{bias} to \code{"country"}, \code{"region"} or +#' \code{"continent"}, and provide the name(s) of the associated countries, +#' regions or continents to \code{bias.area} (see examples). +#' +#' List of possible \code{bias.area} names: +#' \itemize{ +#' \item{Countries: type \code{levels(getMap()@@data$SOVEREIGNT)} in the console} +#' \item{Regions: "Africa", "Antarctica", "Asia", "Australia", "Europe", +#' "North America", "South America"} +#' \item{Continents: "Africa", "Antarctica", "Australia", "Eurasia", +#' "North America", "South America"}} +#' } +#' \item{a polygon: +#' +#' Set \code{bias} to \code{"polygon"}, and provide your +#' polygon to \code{area}. +#' } +#' \item{an extent object: +#' +#' Set \code{bias} to \code{"extent"}, and either provide your +#' extent object to \code{bias.area}, or leave it \code{NULL} to draw an extent on +#' the map.} +#' } +#' +#' Otherwise you can define manually your sampling bias, \emph{e.g.} to create +#' biases along roads. In that case you have to provide to \code{weights} a raster layer in which +#' each cell contains the probability to be sampled. +#' @return a \code{list} with 3 (unbiased sampling) to 4 (biased sampling) elements: +#' \itemize{ +#' \item{\code{sample.points}: the data.frame containing the coordinates of +#' samples, the real presence-absences (or presence-only) and the sampled presence- +#' absences} +#' \item{\code{detection.probability}: the chosen probability of detection of +#' the virtual species} +#' \item{\code{error.probability}: the chosen probability to assign presence +#' in cells where the species is absent} +#' \item{\code{bias}: if a bias was chosen, then the type of bias and the +#' associated \code{area} will be included.} +#' } +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Create an example stack with six environmental variables +#' a <- matrix(rep(dnorm(1:100, 50, sd = 25)), +#' nrow = 100, ncol = 100, byrow = TRUE) +#' env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), +#' raster(a * 1:100), +#' raster(a * logisticFun(1:100, alpha = 10, beta = 70)), +#' raster(t(a)), +#' raster(exp(a)), +#' raster(log(a))) +#' names(env) <- paste("Var", 1:6, sep = "") +#' +#' # More than 6 variables: by default a PCA approach will be used +#' sp <- generateRandomSp(env, niche.breadth = "wide") +#' +#' # Sampling of 25 presences +#' sampleOccurrences(sp, n = 25) +#' +#' # Sampling of 30 presences and absebces +#' sampleOccurrences(sp, n = 30, type = "presence-absence") +#' +#' # Reducing of the probability of detection +#' sampleOccurrences(sp, n = 30, type = "presence-absence", +#' detection.probability = 0.5) +#' +#' # Further reducing in relation to environmental suitability +#' sampleOccurrences(sp, n = 30, type = "presence-absence", +#' detection.probability = 0.5, +#' correct.by.suitability = TRUE) +#' +#' # Creating sampling errors (far too much) +#' sampleOccurrences(sp, n = 30, type = "presence-absence", +#' error.probability = 0.5) +#' +#' # Introducing a sampling bias (oversampling) +#' biased.area <- extent(0.5, 0.7, 0.6, 0.8) +#' sampleOccurrences(sp, n = 50, type = "presence-absence", +#' bias = "extent", +#' bias.area = biased.area) +#' # Showing the area in which the sampling is biased +#' plot(biased.area, add = TRUE) +#' +#' # Introducing a sampling bias (no sampling at all in the chosen area) +#' biased.area <- extent(0.5, 0.7, 0.6, 0.8) +#' sampleOccurrences(sp, n = 50, type = "presence-absence", +#' bias = "extent", +#' bias.strength = 0, +#' bias.area = biased.area) +#' # Showing the area in which the sampling is biased +#' plot(biased.area, add = TRUE) + + + +sampleOccurrences <- function(x, n, + type = "presence only", + sampling.area = NULL, + detection.probability = 1, + correct.by.suitability = FALSE, + error.probability = 0, + bias = "no.bias", + bias.strength = 50, + bias.area = NULL, + weights = NULL, + sample.prevalence = NULL, + plot = TRUE) +{ + results <- list() + + if("virtualspecies" %in% class(x)) + { + if("RasterLayer" %in% class(x$occupied.area)) + { + sp.raster <- x$occupied.area + } else if("RasterLayer" %in% class(x$pa.raster)) + { + sp.raster <- x$pa.raster + } else stop("x must be:\n- a raster layer object\nor\n- the output list from functions + generateRandomSp(), convertToPA() or limitDistribution()") + } else if ("RasterLayer" %in% class(x)) + { + sp.raster <- x + } else stop("x must be:\n- a raster layer object\nor\n- the output list from functions + generateRandomSp(), convertToPA() or limitDistribution()") + + if(sp.raster@data@max > 1 | sp.raster@data@min < 0) + { + stop("There are values above 1 or below 0 in your presence/absence raster. + Please make sure that the provided raster is a correct P/A raster and not a suitability raster.") + } + + original.raster <- sp.raster + + if(!is.null(sample.prevalence)) + { + if(sample.prevalence < 0 | sample.prevalence > 1) + { + stop("Sample prevalence must be a numeric between 0 and 1") + } + } + + if(!is.null(sampling.area)) + { + if(is.character(sampling.area)) + { + worldmap <- rworldmap::getMap() + if (any(!(sampling.area %in% c(levels(worldmap@data$SOVEREIGNT), + levels(worldmap@data$REGION), + levels(worldmap@data$continent))))) + { + stop("The choosen sampling.area is incorrectly spelled.\n Type 'levels(getMap()@data$SOVEREIGNT)', 'levels(worldmap@data$REGION)' and levels(worldmap@data$continent) to obtain valid names.") + } + sampling.area <- worldmap[which(worldmap@data$SOVEREIGNT %in% sampling.area | + worldmap@data$REGION %in% sampling.area | + worldmap@data$continent %in% sampling.area), ] + } else if(!(class(sampling.area) %in% c("SpatialPolygons", "SpatialPolygonsDataFrame", "Extent"))) + { + stop("Please provide to sampling.area either \n + - the names of countries, region and/or continents in which to sample\n + - a SpatialPolygons or SpatialPolygonsDataFrame\n + - an extent\n + in which the sampling will take place") + } + + sample.area.raster1 <- rasterize(sampling.area, + sp.raster, + field = 1, + background = NA, + silent = TRUE) + sp.raster <- sp.raster * sample.area.raster1 + } + + + if(correct.by.suitability) + { + if(!("virtualspecies" %in% class(x)) | !("suitab.raster" %in% names(x))) + { + stop("If you choose to weight the probability of detection by the suitability of the species (i.e., correct.by.suitability = TRUE), + then you need to provide an appropriate virtual species containing a suitability raster to x.") + } + } + + if(!is.numeric(detection.probability) | detection.probability > 1 | detection.probability < 0) + { + stop("detection.probability must be a numeric value between 0 and 1") + } + + if(!is.numeric(error.probability) | error.probability > 1 | error.probability < 0) + { + stop("error.probability must be a numeric value between 0 and 1") + } + + if(length(bias) > 1) + { + stop('Only one bias can be applied at a time') + } + + if (!(bias %in% c("no.bias", "country", "region", "continent", "extent", "polygon", "manual"))) + { + stop('Argument bias must be one of : "no.bias", "country", "region", "continent", "extent", "polygon", "manual"') + } + + if(!is.numeric(bias.strength) & bias != "no.bias") + { + stop("Please provide a numeric value for bias.strength") + } + + if (bias %in% c("country", "region", "continent")) + { + if(!("rworldmap" %in% rownames(installed.packages()))) + { + stop('You need to install the package "rworldmap" in order to use bias = "region" or bias = "country"') + } + worldmap <- rworldmap::getMap() + + if(bias == "country") + { + if (any(!(bias.area %in% levels(worldmap@data$SOVEREIGNT)))) + { + stop("country name(s) must be correctly spelled. Type 'levels(getMap()@data$SOVEREIGNT)' to obtain valid names.") + } + results$bias <- list(bias = bias, + bias.strength = bias.strength, + bias.area = bias.area) + } else if (bias == "region") + { + if (any(!(bias.area %in% levels(worldmap@data$REGION)))) + { + stop(paste("region name(s) must be correctly spelled, according to one of the following : ", + paste(levels(worldmap@data$REGION), collapse = ", "), sep = "\n")) + } + results$bias <- list(bias = bias, + bias.strength = bias.strength, + bias.area = bias.area) + } else if (bias == "continent") + { + if (any(!(bias.area %in% levels(worldmap@data$continent)))) + { + stop(paste("region name(s) must be correctly spelled, according to one of the following : ", + paste(levels(worldmap@data$continent), collapse = ", "), sep = "\n")) + } + results$bias <- list(bias = bias, + bias.strength = bias.strength, + bias.area = bias.area) + } + } + if (bias == "polygon") # Projections are not checked here. Perhaps we should add projection check between raster & polygon in the future? + # This is especially important given that randomPoints weights samplings by the cell area (because cells closer to + # the equator are larger) + { + if(!(class(bias.area) %in% c("SpatialPolygons", "SpatialPolygonsDataFrame"))) + { + stop("If you choose bias = 'polygon', please provide a polygon of class SpatialPolygons or SpatialPolygonsDataFrame to argument bias.area") + } + warning("Polygon projection is not checked. Please make sure you have the same projections between your polygon and your presence-absence raster") + results$bias <- list(bias = bias, + bias.strength = bias.strength, + bias.area = bias.area) + } + + if (bias == "extent") + { + results$bias <- list(bias = bias, + bias.strength = bias.strength, + bias.area = bias.area) + } + + if(type == "presence-absence") + { + sample.raster <- sp.raster + sample.raster[!is.na(sample.raster)] <- 1 + } else if (type == "presence only") + { + sample.raster <- sp.raster + } else stop("type must either be 'presence only' or 'presence-absence'") + + if (bias == "manual") + { + if(!("RasterLayer" %in% class(weights))) + { + stop("You must provide a raster layer of weights (to argument weights) if you choose bias == 'manual'") + } + bias.raster <- weights * sample.raster + results$bias <- list(bias = bias, + weights = weights) + } else + { + bias.raster <- sample.raster + } + + if(bias == "country") + { + bias.raster1 <- rasterize(worldmap[which(worldmap@data$SOVEREIGNT %in% bias.area), ], + bias.raster, + field = bias.strength, + background = 1, + silent = TRUE) + bias.raster <- bias.raster * bias.raster1 + } else if(bias == "region") + { + bias.raster1 <- rasterize(worldmap[which(worldmap@data$REGION %in% bias.area), ], + bias.raster, + field = bias.strength, + background = 1, + silent = TRUE) + bias.raster <- bias.raster * bias.raster1 + } else if(bias == "continent") + { + bias.raster1 <- rasterize(worldmap[which(levels(worldmap@data$continent) %in% bias.area), ], + bias.raster, + field = bias.strength, + background = 1, + silent = TRUE) + bias.raster <- bias.raster * bias.raster1 + } else if(bias == "extent") + { + if(!(class(bias.area) == "Extent")) + { + message("No object of class extent provided: click twice on the map to draw the extent in which presence points will be sampled") + plot(sp.raster) + bias.area <- drawExtent(show = TRUE) + } + bias.raster <- bias.raster * rasterize(bias.area, sp.raster, field = bias.strength, background = 1) + results$bias <- list(bias = bias, + bias.area = bias.area) + } else if(bias == "polygon") + { + bias.raster1 <- rasterize(bias.area, + bias.raster, + field = bias.strength, + background = 1, + silent = TRUE) + bias.raster <- bias.raster * bias.raster1 + } + + + if(bias != "no.bias") + { + if(type == "presence only") + { + sample.points <- dismo::randomPoints(sample.raster * bias.raster, n = n, prob = TRUE, tryf = 2) + } else + { + if(is.null(sample.prevalence)) + { + sample.points <- dismo::randomPoints(sample.raster * bias.raster, n = n, prob = TRUE) + } else + { + tmp1 <- sample.points + tmp1[sp.raster != 1] <- NA + sample.points <- dismo::randomPoints(tmp1 * bias.raster, n = sample.prevalence * n, prob = TRUE) + tmp1 <- sample.raster + tmp1[sp.raster != 0] <- NA + sample.points <- rbind(sample.points, + dismo::randomPoints(tmp1 * bias.raster, n = (1 - sample.prevalence) * n, prob = TRUE)) + rm(tmp1) + } + } + } else + { + if(type == "presence only") + { + sample.points <- dismo::randomPoints(sample.raster, n = n, prob = TRUE, tryf = 2) + } else + { + if(is.null(sample.prevalence)) + { + sample.points <- dismo::randomPoints(sample.raster, n = n, prob = TRUE) + } else + { + tmp1 <- sample.raster + tmp1[sp.raster != 1] <- NA + sample.points <- dismo::randomPoints(tmp1, n = sample.prevalence * n, prob = TRUE) + tmp1 <- sample.raster + tmp1[sp.raster != 0] <- NA + sample.points <- rbind(sample.points, + dismo::randomPoints(tmp1, n = (1 - sample.prevalence) * n, prob = TRUE)) + rm(tmp1) + } + } + } + + if(type == "presence only") + { + if(error.probability != 0) + { + warning("The error probability has no impact when sampling 'presence only' + points, because these samplings occur only within the boundaries of the species range") + } + sample.points <- data.frame(sample.points, + Real = 1, + Observed = sample(c(NA, 1), + size = nrow(sample.points), + prob = c(1 - detection.probability, + detection.probability), + replace = TRUE)) + } else if(type == "presence-absence") + { + sample.points <- data.frame(sample.points, + Real = extract(sp.raster, sample.points)) + + if(correct.by.suitability) + { + suitabs <- extract(x$suitab.raster, sample.points[, c("x", "y")]) + } else { suitabs <- rep(1, nrow(sample.points)) } + + sample.points$Observed <- NA + if(correct.by.suitability) + { + sample.points$Observed[which(sample.points$Real == 1)] <- + sapply(detection.probability * suitabs[which(sample.points$Real == 1)], + function(y) + { + sample(c(0, 1), + size = 1, + prob = c(1 - y, y)) + }) + } else + { + sample.points$Observed[which(sample.points$Real == 1)] <- + sample(c(0, 1), size = length(which(sample.points$Real == 1)), + prob = c(1 - detection.probability, detection.probability), + replace = TRUE) + } + sample.points$Observed[which(sample.points$Real == 0 | sample.points$Observed == 0)] <- + sample(c(0, 1), size = length(which(sample.points$Real == 0 | sample.points$Observed == 0)), + prob = c(1 - error.probability, error.probability), + replace = TRUE) + + } + + if(plot) + { + plot(original.raster) + if(type == "presence only") + { + points(sample.points[, c("x", "y")], pch = 16, cex = .5) + } else + { + points(sample.points[sample.points$Observed == 1, c("x", "y")], pch = 16, cex = .8) + points(sample.points[sample.points$Observed == 0, c("x", "y")], pch = 1, cex = .8) + } + } + + + results$sample.points <- sample.points + results$detection.probability <- detection.probability + results$error.probability <- error.probability + if(type == "presence-absence") + { + + true.prev <- length(sample.points$Real[which(sample.points$Real == 1)]) / nrow(sample.points) + obs.prev <- length(sample.points$Real[which(sample.points$Observed == 1)]) / nrow(sample.points) + + results$sample.prevalence <- c(true.sample.prevalence = true.prev, + observed.sample.prevalence = obs.prev) + } + + + class(results) <- append(class(results), "VSSampledPoints") + return(results) +} \ No newline at end of file diff --git a/R/sp.env.functions.R b/R/sp.env.functions.R new file mode 100644 index 0000000..af0d349 --- /dev/null +++ b/R/sp.env.functions.R @@ -0,0 +1,108 @@ +#' Linear function +#' @description A simple linear function of the form +#' \deqn{ax+b}{a*x+b} +#' @param x a numeric value or vector +#' @param a a numeric value or vector +#' @param b a numeric value or vector +#' @return a numeric value or vector resulting from the function +#' @export +#' @seealso \code{\link{logisticFun}}, \code{\link{quadraticFun}} +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' Maintainer: Boris Leroy \email{leroy.boris@@gmail.com} +#' @examples +#' x <- 1:100 +#' y <- linearFun(x, a = 0.5, b = 0) +#' plot(y ~ x, type = "l") +linearFun <- function(x, a, b) {a * x + b} + +#' Logistic function +#' +#' @description A simple logistic function of the form +#' \deqn{\frac{1}{{1 + e^{\frac{x - \beta}{\alpha}}}}}{ +#' 1 / (1 + exp((x - \beta)/\alpha))} +#' @param x a numeric value or vector +#' @param alpha a numeric value or vector +#' @param beta a numeric value or vector +#' @return a numeric value or vector resulting from the function +#' @export +#' @seealso \code{\link{linearFun}}, \code{\link{quadraticFun}} +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' Maintainer: Boris Leroy \email{leroy.boris@@gmail.com} +#' @details +#' The value of \code{beta} determines the 'threshold' of the logistic curve +#' (i.e. the inflexion point). +#' +#' The value of \code{alpha} determines the slope of the curve (see examples): +#' \itemize{ +#' \item{\code{alpha} very close to 0 will result in a threshold-like response.} +#' \item{Values of \code{alpha} with the same order of magnitude as the range of +#' \code{x} (e.g., the range of\code{x} / 10) will result in a +#' logistic function.} +#' \item{\code{alpha} very far from 0 will result in a linear function.} +#' } +#' @examples +#' x <- 1:100 +#' y <- logisticFun(x, alpha = -10, b = 50) +#' plot(y ~ x, type = "l") +#' +#' # The effect of alpha: +#' y1 <- logisticFun(x, alpha = -0.01, b = 50) +#' y2 <- logisticFun(x, alpha = -10, b = 50) +#' y3 <- logisticFun(x, alpha = -1000, b = 50) +#' +#' par(mfrow = c(1, 3)) +#' plot(y1 ~ x, type = "l", main = expression(alpha %->% 0)) +#' plot(y2 ~ x, type = "l", main = expression(alpha %~~% range(x)/10)) +#' plot(y3 ~ x, type = "l", main = expression(alpha %->% infinity)) +logisticFun <- function(x, alpha, beta) {1 / (1 + exp((x - beta)/alpha))} + +#' Quadratic function +#' +#' @description A simple quadratic function of the form +#' \deqn{ax^2+bx+c}{ +#' a*x^2+b*x+c} +#' @param x a numeric value or vector +#' @param a a numeric value or vector +#' @param b a numeric value or vector +#' @param c a numeric value or vector +#' @return a numeric value or vector resulting from the function +#' @export +#' @seealso \code{\link{linearFun}}, \code{\link{quadraticFun}} +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' Maintainer: Boris Leroy \email{leroy.boris@@gmail.com} +#' @examples +#' x <- 1:100 +#' y <- quadraticFun(x, a = 2, b = 2, c = 3) +#' plot(y ~ x, type = "l") +quadraticFun <- function(x, a, b, c) {a * x^2 + b * x + c} + + +# Functions useful for the PCA approach + +.f <- function(x, co) x %*% co + +.pca.coordinates <- function(x, pca, na.rm) +{ + x <- sweep(x, 2L, pca$cent, check.margin=FALSE) + x <- sweep(x, 2L, pca$norm, "/", check.margin=FALSE) + x1 <- apply(x, 1, .f, co = pca$c1[, 1]) + x2 <- apply(x, 1, .f, co = pca$c1[, 2]) + return(cbind(x1, x2)) +} + +.prob.gaussian <- function(x, means, sds) +{ + dnorm(x[1], mean = means[1], sd = sds[1]) * dnorm(x[2], mean = means[2], sd = sds[2]) +} + + +.thermalFun <- function(Pmax, Tb, To, rho, sigma) +{ + Pmax * exp(-exp(rho * (Tb - To) - 6) - sigma * (Tb - To)^2) +} diff --git a/R/synchroniseNA.R b/R/synchroniseNA.R new file mode 100644 index 0000000..0cd141a --- /dev/null +++ b/R/synchroniseNA.R @@ -0,0 +1,49 @@ +#' Synchronise NA values among layers of a stack +#' +#' @description +#' This function ensures that cells containing NAs are the same among all the +#' layers of a raster stack, i.e.that for any given pixel of the stack, if one layer has a NA, then +#' all layers should be set to NA for that pixel. +#' @details +#' This function can do that in two different ways; if your computer has enough RAM a fast way will be +#' used; otherwise a slower but memory-safe way will be used. +#' @param x a raster stack object which needs to be synchronised. +#' @export +#' @import raster +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' @examples +#' # Creation of a stack with different NAs across layers +#' m <- matrix(nr = 10, nc = 10, 1:100) +#' r1 <- raster(m) +#' r2 <- raster(m) +#' r1[sample(1:ncell(r1), 20)] <- NA +#' r2[sample(1:ncell(r2), 20)] <- NA +#' s <- stack(r1, r2) +#' +#' +#' # Effect of the synchroniseNA() function +#' plot(s) # Not yet synchronised +#' s <- synchroniseNA(s) +#' plot(s) # Synchronised +#' +synchroniseNA <- function(x) +{ + if(canProcessInMemory(x, n = 2)) + { + val <- getValues(x) + if(any(is.na(val))) + { + NA.pos <- unique(which(is.na(val), arr.ind = T)[, 1]) + } + val[NA.pos, ] <- NA + x <- setValues(x, val) + return(x) + } else + { + x <- mask(x, calc(x, fun = sum)) + return(x) + } +} \ No newline at end of file diff --git a/R/virtualspecies-package.R b/R/virtualspecies-package.R new file mode 100644 index 0000000..5a4a0ec --- /dev/null +++ b/R/virtualspecies-package.R @@ -0,0 +1,64 @@ +#' Generation of virtual species +#' +#' This package allows generating virtual species distributions, for example +#' for testing species distribution modelling protocols. For a complete tutorial, +#' see http://borisleroy.com/virtualspecies +#' +#' +#' @details +#' The process of generating a virtual species distribution is divided into +#' four major steps. +#' \enumerate{ +#' \item{Generate a virtual species distributions from environmental variables. +#' This can be done by +#' \itemize{ +#' \item{defining the response functions to each environmental +#' variables with \code{\link{generateSpFromFun}}} +#' \item{computing a PCA among +#' environmental variables, and simulating the response of the species along +#' the two first axes of the PCA with \code{\link{generateSpFromPCA}}}} +#' This step can be entirely randomised with \code{\link{generateRandomSp}} +#' } +#' \item{Convert the virtual species distribution into presence-absence, with +#' \code{\link{convertToPA}}} +#' \item{Facultatively, introduce a distribution bias with +#' \code{\link{limitDistribution}}} +#' \item{Sample occurrence points (presence only or presence-absence) inside the +#' virtual species distribution, either randomly or with biases, with +#' \code{\link{sampleOccurrences}}} +#' } +#' +#' There are other useful functions in the package: +#' \itemize{ +#' \item{\code{\link{formatFunctions}}: this is a helper function to format and +#' illustrate the response functions as a correct input for \code{\link{generateSpFromFun}}} +#' \item{\code{\link{plotResponse}}: to visualise the species-environment relationship +#' of the virtual species} +#' \item{\code{\link{removeCollinearity}}: this function can be used to remove +#' collinearity among variables of a stack by selecting a subset of +#' non-intercorrelated variables} +#' \item{\code{\link{synchroniseNA}}: this function can be used to synchronise +#' NA values among layers of a stack} +#' } +#' +#' +#' +#' +#' This packages makes use of different other packages: +#' \itemize{ +#' \item{This package makes extensive use of the package \code{\link{raster}} to obtain spatialised +#' environmental variables, and produce spatialised virtual species distributions.} +#' \item{\code{\link{dismo}} is used to sample occurrence data from the generated virtual species.} +#' \item{\code{\link{ade4}} is used to generate species with a PCA approach.} +#' \item{\code{\link{rworldmap}} is used to obtain free world shapefiles, in order to create +#' dispersal limitations and sampling biases.} +#' } +#' @author +#' Boris Leroy \email{leroy.boris@@gmail.com} +#' +#' with help from C. N. Meynard, C. Bellard & F. Courchamp +#' +#' Maintainer: Boris Leroy \email{leroy.boris@@gmail.com} +#' @name virtualspecies-package +#' @docType package +NULL \ No newline at end of file diff --git a/man/convertToPA.Rd b/man/convertToPA.Rd new file mode 100644 index 0000000..9d9dbbd --- /dev/null +++ b/man/convertToPA.Rd @@ -0,0 +1,160 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{convertToPA} +\alias{convertToPA} +\title{Convert a virtual species distribution (or a suitability raster) into presence-absence} +\usage{ +convertToPA(x, PA.method = "probability", beta = "random", alpha = -0.05, + species.prevalence = NULL, plot = TRUE) +} +\arguments{ +\item{x}{a suitability raster, or the output from functions +\code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} +or \code{\link{generateRandomSp}}} + +\item{PA.method}{\code{"threshold"} or \code{"probability"}. If +\code{"threshold"}, then occurrence probabilities are simply converted into +presence-absence according to the threshold \code{beta}. If \code{"probability"}, then +probabilities are converted according to a logistic function of threshold +\code{beta} and slope \code{alpha}.} + +\item{beta}{\code{"random"}, a numeric value in the range of your +probabilities or \code{NULL}. This is the threshold of conversion into +presence-absence (= the inflexion point if \code{PA.method = "probability"}). +If \code{"random"}, a numeric value will be randomly generated within the range +of \code{x}.} + +\item{alpha}{\code{NULL} or a negative numeric value. Only useful if +\code{PA.method = "probability"}. The value of \code{alpha} will +shape the logistic function transforming occurrences into presence-absences. +See \code{\link{logisticFun}} and examples therein for the choice of \code{alpha}} + +\item{species.prevalence}{\code{NULL} or a numeric value between 0 and 1. +The species prevalence is the proportion of sites actually occupied by the +species.} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, maps of probabilities +of occurrence and presence-absence will be plotted.} +} +\value{ +a \code{list} containing 5 elements: +\itemize{ +\item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +\item{\code{details}: the details and parameters used to generate the species} +\item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +environmental suitability)} +\item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +\item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +} +The structure of the virtualspecies object can be seen using str() +} +\description{ +This functions converts the probabilities of presence from the output of + \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, \code{\link{generateRandomSp}} +or a suitability raster into +a presence-absence raster. The conversion can be threshold-based, or based +on a probability of conversion (see details). +} +\details{ +The conversion of probabilities of occurrence into presence - absence is +usually performed by selecting a threshold above which presence always occurs, +and never below. However, this approach may be too much unrealistic because +species may sometime be present in areas with a low probability of occurrence, +or be absent from areas with a high probability of occurrence. In addition, +when using a threshold you erase the previously generated response shapes: +it all becomes threshold. Thus, this threshold approach should be avoided. + + +Hence, a more +realistic conversion consists in converting probabilities into presence - +absence with a probability function (see references). Such a probability +conversion can be performed here with a logit function +(see \code{\link{logisticFun}}). + +To perform the probability conversion you have to define two of the +three following parameters: +\itemize{ +\item{\code{beta}: the 'threshold' of the logistic function (i.e. the +inflexion point)} +\item{\code{alpha}: the slope of the logistic function} +\item{\code{species.prevalence}: the proportion of sites in which the species +occur} +} + +If you provide \code{beta} and \code{alpha}, the \code{species.prevalence} +is calculated immediately calculated after conversion into presence-absence. + +On the other hand, if you provide \code{species.prevalence} and either +\code{beta} or \code{alpha}, the function will try to determine \code{alpha} +(if you provided \code{beta}) or \code{beta} (if you provided \code{alpha}). + +The relationship between species prevalence, alpha and beta is dependent +on the available range of environmental conditions (see Meynard and Kaplan, +2011 and especially the Supporting Information). As a consequence, the +desired species prevalence may not be available for the defined \code{alpha} +or \code{beta}. In these conditions, the function will retain the \code{alpha} or +\code{beta} which provides the closest prevalence to your \code{species.prevalence}, +but you may also provide another value of \code{alpha} or \code{beta} to obtain +a closer prevalence. + +In all cases, the \code{species.prevalence} indicated in the output is the +prevalence measured on the output presence-absence map. +} +\note{ +The approximation of \code{alpha} or \code{beta} to the chosen +\code{species.prevalence} may take time if you work on very large rasters. +} +\examples{ +# Create an example stack with two environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100)) +names(env) <- c("variable1", "variable2") + +# Creation of the parameter list +parameters <- formatFunctions(variable1 = c(fun = 'dnorm', mean = 0.00012, + sd = 0.0001), + variable2 = c(fun = 'linearFun', a = 1, b = 0)) +sp1 <- generateSpFromFun(env, parameters, plot = FALSE) + +# Conversion into presence-absence with a threshold-based approach +convertToPA(sp1, PA.method = "threshold", beta = 0.2, plot = TRUE) +convertToPA(sp1, PA.method = "threshold", beta = "random", plot = TRUE) + +# Conversion into presence-absence with a probability-based approach +convertToPA(sp1, PA.method = "probability", beta = 0.4, + alpha = -0.05, plot = TRUE) +convertToPA(sp1, PA.method = "probability", beta = "random", + alpha = -0.1, plot = TRUE) + +# Conversion into presence-absence by choosing the prevalence +# Threshold method +convertToPA(sp1, PA.method = "threshold", + species.prevalence = 0.3, plot = TRUE) +# Probability method, with alpha provided +convertToPA(sp1, PA.method = "probability", alpha = -0.1, + species.prevalence = 0.2, plot = TRUE) +# Probability method, with beta provided +convertToPA(sp1, PA.method = "probability", beta = 0.5, + species.prevalence = 0.2, alpha = NULL, + plot = TRUE) + +# Plot the output Presence-Absence raster only +sp1 <- convertToPA(sp1, plot = FALSE) +plot(sp1$pa.raster) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} +\references{ +Meynard C.N. & Kaplan D.M. 2013. Using virtual species to study species +distributions and model performance. +\emph{Journal of Biogeography} \bold{40}:1-8 + +Meynard C.N. & Kaplan D.M. 2011. The effect of a gradual response to the +environment on species distribution model performance. +\emph{Ecography} \bold{35}:499-509 +} + diff --git a/man/formatFunctions.Rd b/man/formatFunctions.Rd new file mode 100644 index 0000000..1da8a60 --- /dev/null +++ b/man/formatFunctions.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{formatFunctions} +\alias{formatFunctions} +\title{Format and visualise functions used to generate virtual species with \code{\link{generateSpFromFun}}} +\usage{ +formatFunctions(x = NULL, rescale = TRUE, ...) +} +\arguments{ +\item{x}{NULL or a \code{RasterStack}. If you want to visualise the functions, +provide your \code{RasterStack} here.} + +\item{rescale}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, individual response +plots are rescaled between 0 and 1.} + +\item{...}{the parameters to be formatted. See details.} +} +\description{ +This function is a helper function to simplify the formatting of functions +for generateSpFromFun. +} +\details{ +This function formats the \code{parameters} argument of \code{\link{generateSpFromFun}}. +For each environmental variable, provide a vector containing the function name, and its arguments. + + +For example, assume we want to generate a species responding to two environmental variables bio1 and bio2. +\itemize{ +\item{The response to bio1 is a normal response (\code{\link{dnorm}}), of mean 1 and standard deviation 0.5.} +\item{The response to bio2 is a linear response (\code{\link{linearFun}}), of slope (a) 2 and intercept (b) 5.} +} +The correct writing is: + +\code{formatFunctions( +bio1 = c(fun = "dnorm", mean = 1, sd = 0.5), +bio2 = c(fun = "linearFun", a = 2, b = 5))} +} +\section{Warning}{ + +Do not use 'x' as a name for your environmental variables. +} +\examples{ +my.parameters <- formatFunctions(variable1 = c(fun = 'dnorm', + mean = 0.00012, sd = 0.0001), + variable2 = c(fun = 'linearFun', a = 1, b = 0)) + + +my.parameters <- formatFunctions(bio1 = c(fun = "logisticFun", + alpha = -12.7, beta = 68), + bio2 = c(fun = "linearFun", + a = -0.03, b = 191.2), + bio3 = c(fun = "dnorm", + mean = 86.4, sd = 19.1), + bio4 = c(fun = "logisticFun", + alpha = 2198.5, beta = 11381.4)) +\dontrun{ +# An example using worldclim data +bio1.4 <- getData('worldclim', var='bio', res=10)[[1:4]] +my.parameters <- formatFunctions(x = bio1.4, + bio1 = c(fun = "logisticFun", + alpha = -12.7, beta = 68), + bio2 = c(fun = "linearFun", + a = -0.03, b = 191.2), + bio3 = c(fun = "dnorm", + mean = 86.4, sd = 19.1), + bio4 = c(fun = "logisticFun", + alpha = 2198.5, beta = 11381.4)) +} +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/generateRandomSp.Rd b/man/generateRandomSp.Rd new file mode 100644 index 0000000..5e0a291 --- /dev/null +++ b/man/generateRandomSp.Rd @@ -0,0 +1,144 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{generateRandomSp} +\alias{generateRandomSp} +\title{Generate a random virtual species distribution from environmental variables} +\usage{ +generateRandomSp(raster.stack, approach = "automatic", rescale = TRUE, + convert.to.PA = TRUE, relations = c("gaussian", "linear", "logistic", + "quadratic"), rescale.each.response = TRUE, realistic.sp = TRUE, + species.type = "multiplicative", niche.breadth = "any", + sample.points = FALSE, nb.points = 10000, PA.method = "probability", + alpha = -0.1, beta = "random", species.prevalence = NULL, plot = TRUE) +} +\arguments{ +\item{raster.stack}{a RasterStack object, in which each layer represent an environmental +variable.} + +\item{approach}{\code{"automatic"}, \code{"random"}, \code{"response"} +or \code{"pca"}. This parameters defines how species will be generated. +\code{"automatic"}: If less than 6 variables in \code{raster.stack}, a +response approach will be used, otherwise a pca approach will be used. +\code{"random"}: the approach will be randomly picked. Otherwise choose +\code{"response"} or \code{"pca"}. See details.} + +\item{rescale}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the final +probability of presence is rescaled between 0 and 1.} + +\item{convert.to.PA}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the +virtual species distribution will also be converted into Presence-Absence.} + +\item{relations}{[response approach] a vector containing the possible types of response function. +The implemented type of relations are \code{"gaussian"}, \code{"linear"}, +\code{"logistic"} and \code{"quadratic"}.} + +\item{rescale.each.response}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the individual responses to +each environmental variable are rescaled between 0 and 1} + +\item{realistic.sp}{[response approach] \code{TRUE} or \code{FALSE}. If +\code{TRUE}, the function will try to define responses that can form a viable +species. If \code{FALSE}, the responses will be randomly generated +(may result in environmental conditions that do not co-exist).} + +\item{species.type}{[response approach] \code{"additive"}, \code{"multiplicative"} or \code{"mixed"}. Defines +how the final probability of presence is calculated: if \code{"additive"}, responses to each +variable are summed; if \code{"multiplicative"}, responses are multiplicated; if \code{"mixed"} +the final probability will be a random combination of products and sums of individual responses. +See \code{\link{generateSpFromFun}}} + +\item{niche.breadth}{[pca approach] \code{"any"}, \code{"narrow"} or \code{"wide"}. This parameter +defines how tolerant is the species regarding environmental conditions by adjusting +the standard deviations of the gaussian functions. See \code{\link{generateSpFromPCA}}} + +\item{sample.points}{[pca approach] \code{TRUE} of \code{FALSE}. If you have a large +raster file then use this parameter to sample a number of points equal to +\code{nb.points}.} + +\item{nb.points}{[pca approach] a numeric value. Only useful if \code{sample.points = TRUE}. +The number of sampled points from the raster, to perform the PCA. A too small +value may not be representative of the environmental conditions in your raster.} + +\item{PA.method}{\code{"threshold"} or \code{"probability"}. If +\code{"threshold"}, then occurrence probabilities are simply converted into +presence-absence according to the threshold \code{beta}. If \code{"probability"}, then +probabilities are converted according to a logistic function of threshold +\code{beta} and slope \code{alpha}.} + +\item{alpha}{\code{NULL} or a negative numeric value. Only useful if +\code{PA.method = "probability"}. The value of \code{alpha} will +shape the logistic function transforming occurrences into presence-absences. +See \code{\link{logisticFun}} and examples therein for the choice of \code{alpha}} + +\item{beta}{\code{"random"}, a numeric value in the range of your +probabilities or \code{NULL}. This is the threshold of conversion into +presence-absence (= the inflexion point if \code{PA.method = "probability"}). +If \code{"random"}, a numeric value will be randomly generated within the range +of probabilities of occurrence. See \code{\link{convertToPA}}} + +\item{species.prevalence}{\code{NULL} or a numeric value between 0 and 1. +The species prevalence is the proportion of sites actually occupied by the +species. See \code{\link{convertToPA}}} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted.} +} +\value{ +a \code{list} with 3 to 5 elements (depending if the conversion to presence-absence was performed): +\itemize{ +\item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +\item{\code{details}: the details and parameters used to generate the species} +\item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +environmental suitability)} +\item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +\item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +} +The structure of the virtualspecies can object be seen using str() +} +\description{ +This function generates randomly a virtual species distribution. +} +\details{ +By default, the function will perform a probabilistic conversion into presence- +absence, with a randomly chosen beta threshold. If you want to custmose the +conversion parameters, you have to define \bold{two} of the three following parameters: +\itemize{ +\item{\code{beta}: the 'threshold' of the logistic function (i.e. the +inflexion point)} +\item{\code{alpha}: the slope of the logistic function} +\item{\code{species.prevalence}: the proportion of sites in which the species +occur} +} + +If you provide \code{beta} and \code{alpha}, the \code{species.prevalence} +is calculated immediately calculated after conversion into presence-absence. + +As explained in \code{\link{convertToPA}}, if you choose choose a precise +\code{species.prevalence}, it may not be possible to reach this particular +value because of the availability of environmental conditions. Several +runs may be necessary to reach the desired \code{species.prevalence}. +} +\examples{ +# Create an example stack with six environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a)), + raster(exp(a)), + raster(log(a))) +names(env) <- paste("Var", 1:6, sep = "") + +# More than 6 variables: by default a PCA approach will be used +generateRandomSp(env) + +# Manually choosing a response approach +generateRandomSp(env, approach = "response") + +# Randomly choosing the approach +generateRandomSp(env, approach = "random") +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/generateSpFromFun.Rd b/man/generateSpFromFun.Rd new file mode 100644 index 0000000..618b7a5 --- /dev/null +++ b/man/generateSpFromFun.Rd @@ -0,0 +1,137 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{generateSpFromFun} +\alias{generateSpFromFun} +\title{Generate a virtual species distributions with responses to environmental variables} +\usage{ +generateSpFromFun(raster.stack, parameters, rescale = TRUE, + species.type = "multiplicative", formula = NULL, + rescale.each.response = TRUE, plot = FALSE) +} +\arguments{ +\item{raster.stack}{a RasterStack object, in which each layer represent an environmental +variable.} + +\item{parameters}{a list containing the functions of response of the species to +environmental variables with their parameters. See details.} + +\item{rescale}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the final probability of presence is rescaled between 0 and 1.} + +\item{species.type}{\code{"additive"}, \code{"multiplicative"} or \code{"mixed"}. Defines +how the final probability of presence is calculated: if \code{"additive"}, responses to each +variable are summed; if \code{"multiplicative"}, responses are multiplicated; if \code{"mixed"} +responses are both summed and multiplicated depending on argument \code{formula}} + +\item{formula}{\code{NULL} to create a random formula to calculate the final probability +of presence, or a character string of the form: \code{"layername1 + layername2 * +layername3 * etc."} to manually define it. Only used if \code{species.type} is set to +\code{"mixed"}} + +\item{rescale.each.response}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the individual responses to +each environmental variable are rescaled between 0 and 1 (see details).} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted.} +} +\value{ +a \code{list} with 3 elements: +\itemize{ +\item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +\item{\code{details}: the details and parameters used to generate the species} +\item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +environmental suitability)} +} +#' The structure of the virtualspecies object can be seen using str() +} +\description{ +This function generates a virtual species distribution from a RasterStack of environmental +variables and a defined set of responses to each environmental parameter. +} +\details{ +This functions proceeds into several steps: +\enumerate{ +\item{The response to each environmental variable is calculated with the functions provided +in \code{parameters}. This results in a probability of presence for each variable.} +\item{If \code{rescale.each.response} is \code{TRUE}, each probability of presence is rescaled between 0 and 1.} +\item{The final probability of presence is calculated according to the chosen \code{species.type}.} +\item{If \code{rescale} is \code{TRUE}, the final probability of presence is rescaled between 0 and 1.} +} +The RasterStack containing environmental variables must have consistent names, +because they will be checked with the \code{parameters}. For example, they can be named +var1, var2, etc. Names can be checked and set with \code{names(my.stack)}. + +The \code{parameters} have to be carefully created, otherwise the function will not work: +\itemize{ +\item{Either see \code{\link{formatFunctions}} to easily create your list of parameters} +\item{Or create a \code{list} defined according to the following template: \cr +\code{list( + var1 = list(fun = 'fun1', args = list(arg1 = ..., arg2 = ..., etc.)), + var2 = list(fun = 'fun2', args = list(arg1 = ..., arg2 = ..., etc.)))}\cr +It is important to keep the same names in the parameters as in the stack of environmental +variables. Similarly, argument names must be identical to argument names in the associated +function (e.g., if you use \code{fun = 'dnorm'}, then args should look like \code{list(mean = 0, sd = 1)}). + +See the example section below for more examples.}} + + +Any response function that can be applied to the environmental variables can +be chosen here. Several functions are proposed in this package: +\code{\link{linearFun}}, \code{\link{logisticFun}} and \code{\link{quadraticFun}}. +Another classical example is the normal distribution: \code{\link[stats]{dnorm}}. +Ther users can also create and use their own functions. + + +If \code{rescale.each.response = TRUE}, then the probability response to each +variable will be normalised between 0 and 1 according to the following formula: +P.rescaled = (P - min(P)) / (max(P) - min (P)) +This rescaling has a strong impact on response functions, so users may prefer to +use \code{rescale.each.response = FALSE} and apply their own rescaling within +their response functions. +} +\examples{ +# Create an example stack with two environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100)) +names(env) <- c("variable1", "variable2") +plot(env) # Illustration of the variables + +# Easy creation of the parameter list: +# see in real time the shape of the response functions +parameters <- formatFunctions(variable1 = c(fun = 'dnorm', mean = 1e-04, + sd = 1e-04), + variable2 = c(fun = 'linearFun', a = 1, b = 0)) + +# If you provide env, then you can see the shape of response functions: +parameters <- formatFunctions(x = env, + variable1 = c(fun = 'dnorm', mean = 1e-04, + sd = 1e-04), + variable2 = c(fun = 'linearFun', a = 1, b = 0)) + +# Generation of the virtual species +sp1 <- generateSpFromFun(env, parameters) +sp1 +par(mfrow = c(1, 1)) +plot(sp1) + + +# Manual creation of the parameter list +# Note that the variable names are the same as above +parameters <- list(variable1 = list(fun = 'dnorm', + args = list(mean = 0.00012, + sd = 0.0001)), + variable2 = list(fun = 'linearFun', + args = list(a = 1, b = 0))) +# Generation of the virtual species +sp1 <- generateSpFromFun(env, parameters, plot = TRUE) +sp1 +plot(sp1) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} +\seealso{ +\code{\link{generateSpFromPCA}} to generate a virtual species with a PCA approach +} + diff --git a/man/generateSpFromPCA.Rd b/man/generateSpFromPCA.Rd new file mode 100644 index 0000000..dab2c75 --- /dev/null +++ b/man/generateSpFromPCA.Rd @@ -0,0 +1,142 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{generateSpFromPCA} +\alias{generateSpFromPCA} +\title{Generate a virtual species distribution with a PCA of environmental variables} +\usage{ +generateSpFromPCA(raster.stack, rescale = TRUE, niche.breadth = "any", + means = NULL, sds = NULL, pca = NULL, remove.collinearity = FALSE, + multicollinearity.cutoff = 0.7, sample.points = FALSE, + nb.points = 10000, plot = TRUE) +} +\arguments{ +\item{raster.stack}{a RasterStack object, in which each layer represent an environmental +variable.} + +\item{rescale}{\code{TRUE} of \code{FALSE}. Should the output suitability raster be +rescaled between 0 and 1?} + +\item{niche.breadth}{\code{"any"}, \code{"narrow"} or \code{"wide"}. This parameter +defines how tolerant is the species regarding environmental conditions by adjusting +the standard deviations of the gaussian functions. See details.} + +\item{means}{a vector containing two numeric values. Will be used to define +the means of the gaussian response functions to the axes of the PCA.} + +\item{sds}{a vector containing two numeric values. Will be used to define +the standard deviations of the gaussian response functions to the axes of +the PCA.} + +\item{pca}{a \code{dudi.pca} object. You can provide a pca object that you +computed yourself with \code{\link[ade4]{dudi.pca}}} + +\item{remove.collinearity}{\code{TRUE} of \code{FALSE}. Can be set to \code{TRUE} +to remove groups of collinear variables (at the cutoff \code{multicollinearity.cutoff}) +in your dataset: only one variable per group will be kept. See \code{\link{removeCollinearity}} +for details.} + +\item{multicollinearity.cutoff}{a numeric value between 0 and 1. Only useful if +\code{remove.collinearity = TRUE}. The cutoff of collinearity above which +variables will be grouped together.} + +\item{sample.points}{\code{TRUE} of \code{FALSE}. If you have a large +raster file then use this parameter to sample a number of points equal to +\code{nb.points}.} + +\item{nb.points}{a numeric value. Only useful if \code{sample.points = TRUE}. +The number of sampled points from the raster, to perform the PCA. A too small +value may not be representative of the environmental conditions in your raster.} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the generated virtual species will be plotted.} +} +\value{ +a \code{list} with 3 elements: +\itemize{ +\item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"pca"}} +\item{\code{details}: the details and parameters used to generate the species} +\item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +environmental suitability} +} +The structure of the virtualspecies object can be seen using str() +} +\description{ +This functions generates a virtual species distribution by computing a +PCA among environmental variables, and simulating the response of the species +along the two first axes of the PCA. The response to axes of the PCA is +determined with gaussian functions. +} +\details{ +This function proceeds in 3 steps: +\enumerate{ +\item{A PCA of environmental conditions is generated} +\item{Gaussian responses to the first two axes are computed} +\item{These responses are multiplied to obtain the final environmental suitability}} + +The shape of gaussian responses can be determined manually, by defining both +\code{means} and \code{sds}, or randomly. The random generation is constrained +by the argument \code{niche.breadth}, which controls the range of possible +standard deviation values. This range of values is based on +a fraction of the axis: +\itemize{ +\item{\code{"any"}: the standard deviations can have values from 1/100 to 1/2 +of the axes' span} +\item{\code{"narrow"}: the standard deviations can have values from 1/100 to 1/10 +of the axes' span} +\item{\code{"wide"}: the standard deviations can have values from 1/10 to 1/2 +of the axes' span} +} +} +\note{ +To perform the PCA, the function has to transform the raster into a matrix. +This may not be feasible if the raster is too large for the computer's memory. +In this case, you should perform the PCA on a sample of your raster with +set \code{sample.points = TRUE} and choose the number of points to sample with +\code{nb.points}. +} +\examples{ +# Create an example stack with four environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a))) +names(env) <- c("var1", "var2", "var3", "var4") +plot(env) # Illustration of the variables + + + + + +# Generating a species with the PCA + +generateSpFromPCA(raster.stack = env) + +# The top part of the plot shows the PCA and the response functions along +# the two axes. +# The bottom part shows the probabilities of occurrence of the virtual +# species. + + + + + +# Defining manually the response to axes + +generateSpFromPCA(raster.stack = env, + means = c(-2, 0), + sds = c(0.6, 1.5)) + +# This species can be seen as occupying intermediate altitude ranges of a +# conic mountain. +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} +\seealso{ +\code{\link{generateSpFromFun}} to generate a virtual species with +the responses to each environmental variables. +#' +} + diff --git a/man/limitDistribution.Rd b/man/limitDistribution.Rd new file mode 100644 index 0000000..cc574c6 --- /dev/null +++ b/man/limitDistribution.Rd @@ -0,0 +1,127 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{limitDistribution} +\alias{limitDistribution} +\title{Limit a virtual species distribution to a defined area} +\usage{ +limitDistribution(x, geographical.limit = "extent", area = NULL, + plot = TRUE) +} +\arguments{ +\item{x}{a \code{rasterLayer} object composed of 0, 1 and NA, or the output list from +\code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} +or \code{\link{generateRandomSp}}} + +\item{geographical.limit}{\code{"country"}, \code{"region"}, +\code{"continent"}, \code{"polygon"} or \code{"extent"}. The method used +to limit the distribution range: see details.} + +\item{area}{\code{NULL}, a character string, a \code{polygon} or an \code{extent} object. +The area in which the distribution range will be limited: see details. If \code{NULL} +and \code{geographical.limit = "extent"}, then you will be asked to draw an +extent on the map.} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the resulting limited +distribution will be plotted.} +} +\value{ +a \code{list} containing 7 elements: +\itemize{ +\item{\code{approach}: the approach used to generate the species, \emph{i.e.}, \code{"response"}} +\item{\code{details}: the details and parameters used to generate the species} +\item{\code{suitab.raster}: the virtual species distribution, as a Raster object containing the +environmental suitability)} +\item{\code{PA.conversion}: the parameters used to convert the suitability into presence-absence} +\item{\code{pa.raster}: the presence-absence map, as a Raster object containing 0 (absence) / 1 (presence) / NA} +\item{\code{geographical.limit}: the method used to +limit the distribution and the area in which the distribution is restricted} +\item{\code{occupied.area}: the area occupied by the virtual species as a +Raster of presence-absence} +} +The structure of the virtualspecies object can be seen using str() +a \code{list} containing: +\itemize{ +\item{\code{geographical.limit}: a list with two elements, the method used to +limit the distribution and the area in which the distribution is restricted} +\item{\code{occupied.area}: the area occupied by the virtual species as a +Raster of presence-absence} +} +} +\description{ +This function is designed to limit species distributions to a subsample of +their total distribution range. It will thus generate a species which is not +at the equilibrium with its environment (i.e., which did not occupy the full +range of suitable environmental conditions). + +This function basically takes any type of raster and will limit values above +0 to areas where the species is allowed to disperse. +} +\details{ +\bold{How the function works:} + +The function will remove occurrences of the species outside the chosen area: +\itemize{ +\item{NA are kept unchanged} +\item{0 are kept unchanged} +\item{values > 0 within the limits of \code{area} are kept unchanged} +\item{values > 0 outside the limits of \code{area} are set to 0} +} + + +\bold{How to define the area in which the range is limited:} + +You can choose to limit the distribution range of the species to: +\enumerate{ +\item{a particular country, region or continent (assuming your raster has +the WGS84 projection): + +Set the argument +\code{geographical.limit} to \code{"country"}, \code{"region"} or +\code{"continent"}, and provide the name(s) of the associated countries, +regions or continents to \code{area} (see examples). + +List of possible \code{area} names: +\itemize{ +\item{Countries: type \code{levels(getMap()@data$SOVEREIGNT)} in the console} +\item{Regions: "Africa", "Antarctica", "Asia", "Australia", "Europe", +"North America", "South America"} +\item{Continents: "Africa", "Antarctica", "Australia", "Eurasia", +"North America", "South America"}} +} +\item{a polygon: + +Set \code{geographical.limit} to \code{"polygon"}, and provide your +polygon to \code{area}. +} +\item{an extent object: + +Set \code{geographical.limit} to \code{"extent"}, and either provide your +extent object to \code{area}, or leave it \code{NULL} to draw an extent on +the map.} +} +} +\examples{ +# Create an example stack with six environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a)), + raster(exp(a)), + raster(log(a))) +names(env) <- paste("Var", 1:6, sep = "") + +# More than 6 variables: by default a PCA approach will be used +sp <- generateRandomSp(env) + +# limiting the distribution to a specific extent +limit <- extent(0.5, 0.7, 0.6, 0.8) + +limitDistribution(sp, area = limit) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/linearFun.Rd b/man/linearFun.Rd new file mode 100644 index 0000000..8910971 --- /dev/null +++ b/man/linearFun.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{linearFun} +\alias{linearFun} +\title{Linear function} +\usage{ +linearFun(x, a, b) +} +\arguments{ +\item{x}{a numeric value or vector} + +\item{a}{a numeric value or vector} + +\item{b}{a numeric value or vector} +} +\value{ +a numeric value or vector resulting from the function +} +\description{ +A simple linear function of the form +\deqn{ax+b}{a*x+b} +} +\examples{ +x <- 1:100 +y <- linearFun(x, a = 0.5, b = 0) +plot(y ~ x, type = "l") +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +Maintainer: Boris Leroy \email{leroy.boris@gmail.com} +} +\seealso{ +\code{\link{logisticFun}}, \code{\link{quadraticFun}} +} + diff --git a/man/logisticFun.Rd b/man/logisticFun.Rd new file mode 100644 index 0000000..9a7c19d --- /dev/null +++ b/man/logisticFun.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{logisticFun} +\alias{logisticFun} +\title{Logistic function} +\usage{ +logisticFun(x, alpha, beta) +} +\arguments{ +\item{x}{a numeric value or vector} + +\item{alpha}{a numeric value or vector} + +\item{beta}{a numeric value or vector} +} +\value{ +a numeric value or vector resulting from the function +} +\description{ +A simple logistic function of the form +\deqn{\frac{1}{{1 + e^{\frac{x - \beta}{\alpha}}}}}{ +1 / (1 + exp((x - \beta)/\alpha))} +} +\details{ +The value of \code{beta} determines the 'threshold' of the logistic curve +(i.e. the inflexion point). + +The value of \code{alpha} determines the slope of the curve (see examples): +\itemize{ +\item{\code{alpha} very close to 0 will result in a threshold-like response.} +\item{Values of \code{alpha} with the same order of magnitude as the range of +\code{x} (e.g., the range of\code{x} / 10) will result in a +logistic function.} +\item{\code{alpha} very far from 0 will result in a linear function.} +} +} +\examples{ +x <- 1:100 +y <- logisticFun(x, alpha = -10, b = 50) +plot(y ~ x, type = "l") + +# The effect of alpha: +y1 <- logisticFun(x, alpha = -0.01, b = 50) +y2 <- logisticFun(x, alpha = -10, b = 50) +y3 <- logisticFun(x, alpha = -1000, b = 50) + +par(mfrow = c(1, 3)) +plot(y1 ~ x, type = "l", main = expression(alpha \%->\% 0)) +plot(y2 ~ x, type = "l", main = expression(alpha \%~~\% range(x)/10)) +plot(y3 ~ x, type = "l", main = expression(alpha \%->\% infinity)) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +Maintainer: Boris Leroy \email{leroy.boris@gmail.com} +} +\seealso{ +\code{\link{linearFun}}, \code{\link{quadraticFun}} +} + diff --git a/man/plotResponse.Rd b/man/plotResponse.Rd new file mode 100644 index 0000000..716ca7e --- /dev/null +++ b/man/plotResponse.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{plotResponse} +\alias{plotResponse} +\title{Visualise the response of the virtual species to environmental variables} +\usage{ +plotResponse(x, parameters = NULL, approach = NULL, rescale = TRUE, ...) +} +\arguments{ +\item{x}{the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, +\code{\link{generateRandomSp}}, or +a raster layer/stack of environmental variables (see details for the latter).} + +\item{parameters}{in case of manually defined response functions, a list +containing the associated parameters. See details.} + +\item{approach}{in case of manually defined response functions, the chosen +approach: either \code{"response"} for a per-variable response approach, or +\code{"pca"} for a PCA approach.} + +\item{rescale}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, individual response +plots are rescaled between 0 and 1.} + +\item{...}{further arguments to be passed to \code{plot}. See +\code{\link[graphics]{plot}} and \code{\link[graphics]{par}}.} +} +\description{ +This function plots the relationships between the virtual species and the environmental variables. +It requires either the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}}, +\code{\link{generateRandomSp}}, +or a manually defined set of environmental variables and response functions. +} +\details{ +If you provide the output from \code{\link{generateSpFromFun}}, \code{\link{generateSpFromPCA}} or +\code{\link{generateRandomSp}} +then the function will automatically make the appropriate plots. + +Otherwise, you can provide a raster layer/stack of environmental variables to + \code{x} and a list of functions to \code{parameters} to perform the plot. +In that case, you have to specifiy the \code{approach}: \code{"reponse"} or +\code{"PCA"}: +\itemize{ +\item{if \code{approach = "response"}: Provide to \code{parameters} a +\code{list} exactly as defined in \code{\link{generateSpFromFun}}:\cr +\code{list( + var1 = list(fun = 'fun1', args = list(arg1 = ..., arg2 = ..., etc.)), + var2 = list(fun = 'fun2', args = list(arg1 = ..., arg2 = ..., etc.)))}\cr + +} +\item{if \code{approach = "PCA"}: Provide to \code{parameters} a +\code{list} containing the following elements: +\itemize{ +\item{\code{pca}: a \code{dudi.pca} object computed with +\code{\link[ade4]{dudi.pca}}} +\item{\code{means}: a vector containing two numeric values. Will be used to define +the means of the gaussian response functions to the axes of the PCA.} +\item{\code{sds} a vector containing two numeric values. Will be used to define +the standard deviations of the gaussian response functions to the axes of +the PCA.}} +} +} +} +\examples{ +# Create an example stack with four environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a))) +names(env) <- c("var1", "var2", "var3", "var4") + +# Per-variable response approach: +parameters <- formatFunctions(var1 = c(fun = 'dnorm', mean = 0.00012, + sd = 0.0001), + var2 = c(fun = 'linearFun', a = 1, b = 0), + var3 = c(fun = 'quadraticFun', a = -20, b = 0.2, + c = 0), + var4 = c(fun = 'logisticFun', alpha = -0.001, + beta = 0.002)) +sp1 <- generateSpFromFun(env, parameters, plot = TRUE) +plotResponse(sp1) + +# PCA approach: +sp2 <- generateSpFromPCA(env, plot = FALSE) +par(mfrow = c(1, 1)) +plotResponse(sp2) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/quadraticFun.Rd b/man/quadraticFun.Rd new file mode 100644 index 0000000..fedd6c5 --- /dev/null +++ b/man/quadraticFun.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{quadraticFun} +\alias{quadraticFun} +\title{Quadratic function} +\usage{ +quadraticFun(x, a, b, c) +} +\arguments{ +\item{x}{a numeric value or vector} + +\item{a}{a numeric value or vector} + +\item{b}{a numeric value or vector} + +\item{c}{a numeric value or vector} +} +\value{ +a numeric value or vector resulting from the function +} +\description{ +A simple quadratic function of the form +\deqn{ax^2+bx+c}{ +a*x^2+b*x+c} +} +\examples{ +x <- 1:100 +y <- quadraticFun(x, a = 2, b = 2, c = 3) +plot(y ~ x, type = "l") +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +Maintainer: Boris Leroy \email{leroy.boris@gmail.com} +} +\seealso{ +\code{\link{linearFun}}, \code{\link{quadraticFun}} +} + diff --git a/man/removeCollinearity.Rd b/man/removeCollinearity.Rd new file mode 100644 index 0000000..aa8cee2 --- /dev/null +++ b/man/removeCollinearity.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{removeCollinearity} +\alias{removeCollinearity} +\title{Remove collinearity among variables of a a raster stack} +\usage{ +removeCollinearity(raster.stack, multicollinearity.cutoff = 0.7, + select.variables = FALSE, sample.points = FALSE, nb.points = 10000, + plot = FALSE) +} +\arguments{ +\item{raster.stack}{a RasterStack object, in which each layer represent an environmental +variable.} + +\item{multicollinearity.cutoff}{a numeric value corresponding to the cutoff +of correlation above which to group variables.} + +\item{select.variables}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, then the +function will choose one variable among each group to return a vector of +non correlated variables (see details). If \code{FALSE}, the function will return a list +containing the groups of correlated variables.} + +\item{sample.points}{\code{TRUE} or \code{FALSE}. If you have a large +raster file then use this parameter to sample a number of points equal to +\code{nb.points}.} + +\item{nb.points}{a numeric value. Only useful if \code{sample.points = TRUE}. +The number of sampled points from the raster, to perform the PCA. A too small +value may not be representative of the environmental conditions in your raster.} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the hierarchical +ascendant classification used to group variables will be plotted.} +} +\value{ +a vector of non correlated variables, or a list where each element is a +group of non correlated variables. +} +\description{ +This functions analyses the correlation among variables of the provideded +stack of environmental variables (using Pearson's R), and can return a +vector containing names of variables that are not intercorrelated, or a list +containing grouping variables according to their degree of collinearity. +} +\details{ +This function uses the Pearson's correlation coefficient to analyse +correlation among variables. This coefficient is then used to compute a +distance matrix, which in turn is used it compute an ascendant hierarchical +classification, with the '\emph{complete}' method (see +\code{\link[stats]{hclust}}). If at least one correlation above the \code{ +multicollinearity.cutoff} is detected, then the variables will be grouped +according to their degree of correlation. + +If \code{select.variables = TRUE}, then the function will return a vector +containing variables that are not intercorrelated. +The variables not correlated to any other variables are automatically included +in this vector. For each group of intercorrelated variables, one variable will +be randomly chosen and included in this vector. +} +\examples{ +# Create an example stack with six environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a)), + raster(exp(a)), + raster(log(a))) +names(env) <- paste("Var", 1:6, sep = "") + +# Defaults settings: cutoff at 0.7 +removeCollinearity(env, plot = TRUE) + +# Changing cutoff to 0.5 +removeCollinearity(env, plot = TRUE, multicollinearity.cutoff = 0.5) + +# Automatic selection of variables not intercorrelated +removeCollinearity(env, plot = TRUE, select.variables = TRUE) + +# Assuming a very large raster file: selecting a subset of points +removeCollinearity(env, plot = TRUE, select.variables = TRUE, + sample.points = TRUE, nb.points = 5000) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/sampleOccurrences.Rd b/man/sampleOccurrences.Rd new file mode 100644 index 0000000..40399a9 --- /dev/null +++ b/man/sampleOccurrences.Rd @@ -0,0 +1,218 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{sampleOccurrences} +\alias{sampleOccurrences} +\title{Sample occurrences in a virtual species distribution} +\usage{ +sampleOccurrences(x, n, type = "presence only", sampling.area = NULL, + detection.probability = 1, correct.by.suitability = FALSE, + error.probability = 0, bias = "no.bias", bias.strength = 50, + bias.area = NULL, weights = NULL, sample.prevalence = NULL, + plot = TRUE) +} +\arguments{ +\item{x}{a \code{rasterLayer} object or the output list from +\code{generateSpFromFun}, \code{generateSpFromPCA}, \code{generateRandomSp}, \code{convertToPA} +or \code{limitDistribution} +The raster must contain values of 0 or 1 (or NA).} + +\item{n}{an integer. The number of occurrence points to sample.} + +\item{type}{\code{"presence only"} or \code{"presence-absence"}. The type of +occurrence points to sample.} + +\item{sampling.area}{a character string, a \code{polygon} or an \code{extent} object. +The area in which the sampling will take place. See details.} + +\item{detection.probability}{a numeric value between 0 and 1, corresponding to the +probability of detection of the species. See details.} + +\item{correct.by.suitability}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, then +the probability of detection will be weighted by the suitability, such that +cells with lower suitabilities will further decrease the chance that the species +is detected when sampled.} + +\item{error.probability}{\code{TRUE} or \code{FALSE}. Only useful if +\code{type = "presence-absence"}. Probability to attribute an erroneous presence +in cells where the species is absent.} + +\item{bias}{\code{"no.bias"}, \code{"country"}, \code{"region"}, +\code{"extent"}, \code{"polygon"} or \code{"manual"}. The method used to +generate a sampling bias: see details.} + +\item{bias.strength}{a positive numeric value. The strength of the bias to be applied +in \code{area} (as a multiplier). Above 1, \code{area} will be oversampled. Below 1, \code{area} +will be undersampled.} + +\item{bias.area}{\code{NULL}, a character string, a \code{polygon} or an \code{extent} object. +The area in which the sampling will be biased: see details. If \code{NULL} +and \code{bias = "extent"}, then you will be asked to draw an +extent on the map.} + +\item{weights}{\code{NULL} or a raster layer. Only used if \code{bias = "manual"}. +The raster of bias weights to be applied to the sampling of occurrences. +Higher weights mean a higher probability of sampling.} + +\item{sample.prevalence}{\code{NULL} or a numeric value between 0 and 1. Only useful if +\code{type = "presence-absence"}. Defines the sample prevalence, i.e. the proportion of presences +sampled. Note that the probabilities of detection and error are applied AFTER this parameter, +so the final sample prevalence may not different if you apply probabilities of detection and/or error} + +\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the sampled occurrence +points will be plotted.} +} +\value{ +a \code{list} with 3 (unbiased sampling) to 4 (biased sampling) elements: +\itemize{ +\item{\code{sample.points}: the data.frame containing the coordinates of +samples, the real presence-absences (or presence-only) and the sampled presence- +absences} +\item{\code{detection.probability}: the chosen probability of detection of +the virtual species} +\item{\code{error.probability}: the chosen probability to assign presence +in cells where the species is absent} +\item{\code{bias}: if a bias was chosen, then the type of bias and the +associated \code{area} will be included.} +} +} +\description{ +This function samples presences within a species distribution, either +randomly or with a sampling bias. The sampling bias can be defined manually +or with a set of pre-defined biases. +} +\details{ +\bold{How the function works:} + +The function randomly selects \code{n} cells in which samples occur. If a \code{bias} +is chosen, then the selection of these cells will be biased according to the type and +strength of bias chosen. If the sampling is of \code{type "presence only"}, then only +cells where the species is present will be chosen. If the sampling is of +\code{type "presence-absence"}, then all non-NA cells can be chosen. + +The function then samples the species inside the chosen cells. In cells +where the species is present the species will always be sampled unless +the parameter \code{detection.probability} is lower than 1. In that case the +species will be sampled with the associated probability of detection. + +In cells where the species is absent (in case of a \code{"presence-absence"} +sampling), the function will always assign absence unless \code{error.probability} +is greater than 1. In that case, the species can be found present with the +associated probability of error. Note that this step happens AFTER the detection +step. Hence, in cells where the species is present but not detected, it can +still be sampled due to a sampling error. + +\bold{How to restrict the sampling area:} + +Use the argument \code{sampling.area}: +\itemize{ +\item{Provide the name (s) (or a combination of names) of country(ies), region(s) or continent(s). +Examples: +\itemize{ +\item{\code{sampling.area = "Africa"}} +\item{\code{sampling.area = c("Africa", "North America", "France")}} +}} +\item{Provide a polygon (\code{SpatialPolygons} or \code{SpatialPolygonsDataFrame} +of package \code{sp})} +\item{Provide an \code{extent} object} +} + +\bold{How the sampling bias works:} + +The argument \code{bias.strength} indicates the strength of the bias. +For example, a value of 50 will result in 50 times more samples within the + \code{bias.area} than outside. +Conversely, a value of 0.5 will result in half less samples within the +\code{bias.area} than outside. + +\bold{How to choose where the sampling is biased:} + +You can choose to bias the sampling in: +\enumerate{ +\item{a particular country, region or continent (assuming your raster has +the WGS84 projection): + +Set the argument +\code{bias} to \code{"country"}, \code{"region"} or +\code{"continent"}, and provide the name(s) of the associated countries, +regions or continents to \code{bias.area} (see examples). + +List of possible \code{bias.area} names: +\itemize{ +\item{Countries: type \code{levels(getMap()@data$SOVEREIGNT)} in the console} +\item{Regions: "Africa", "Antarctica", "Asia", "Australia", "Europe", +"North America", "South America"} +\item{Continents: "Africa", "Antarctica", "Australia", "Eurasia", +"North America", "South America"}} +} +\item{a polygon: + +Set \code{bias} to \code{"polygon"}, and provide your +polygon to \code{area}. +} +\item{an extent object: + +Set \code{bias} to \code{"extent"}, and either provide your +extent object to \code{bias.area}, or leave it \code{NULL} to draw an extent on +the map.} +} + +Otherwise you can define manually your sampling bias, \emph{e.g.} to create +biases along roads. In that case you have to provide to \code{weights} a raster layer in which +each cell contains the probability to be sampled. +} +\examples{ +# Create an example stack with six environmental variables +a <- matrix(rep(dnorm(1:100, 50, sd = 25)), + nrow = 100, ncol = 100, byrow = TRUE) +env <- stack(raster(a * dnorm(1:100, 50, sd = 25)), + raster(a * 1:100), + raster(a * logisticFun(1:100, alpha = 10, beta = 70)), + raster(t(a)), + raster(exp(a)), + raster(log(a))) +names(env) <- paste("Var", 1:6, sep = "") + +# More than 6 variables: by default a PCA approach will be used +sp <- generateRandomSp(env, niche.breadth = "wide") + +# Sampling of 25 presences +sampleOccurrences(sp, n = 25) + +# Sampling of 30 presences and absebces +sampleOccurrences(sp, n = 30, type = "presence-absence") + +# Reducing of the probability of detection +sampleOccurrences(sp, n = 30, type = "presence-absence", + detection.probability = 0.5) + +# Further reducing in relation to environmental suitability +sampleOccurrences(sp, n = 30, type = "presence-absence", + detection.probability = 0.5, + correct.by.suitability = TRUE) + +# Creating sampling errors (far too much) +sampleOccurrences(sp, n = 30, type = "presence-absence", + error.probability = 0.5) + +# Introducing a sampling bias (oversampling) +biased.area <- extent(0.5, 0.7, 0.6, 0.8) +sampleOccurrences(sp, n = 50, type = "presence-absence", + bias = "extent", + bias.area = biased.area) +# Showing the area in which the sampling is biased +plot(biased.area, add = TRUE) + +# Introducing a sampling bias (no sampling at all in the chosen area) +biased.area <- extent(0.5, 0.7, 0.6, 0.8) +sampleOccurrences(sp, n = 50, type = "presence-absence", + bias = "extent", + bias.strength = 0, + bias.area = biased.area) +# Showing the area in which the sampling is biased +plot(biased.area, add = TRUE) +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/synchroniseNA.Rd b/man/synchroniseNA.Rd new file mode 100644 index 0000000..aef1a84 --- /dev/null +++ b/man/synchroniseNA.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{synchroniseNA} +\alias{synchroniseNA} +\title{Synchronise NA values among layers of a stack} +\usage{ +synchroniseNA(x) +} +\arguments{ +\item{x}{a raster stack object which needs to be synchronised.} +} +\description{ +This function ensures that cells containing NAs are the same among all the +layers of a raster stack, i.e.that for any given pixel of the stack, if one layer has a NA, then +all layers should be set to NA for that pixel. +} +\details{ +This function can do that in two different ways; if your computer has enough RAM a fast way will be +used; otherwise a slower but memory-safe way will be used. +} +\examples{ +# Creation of a stack with different NAs across layers +m <- matrix(nr = 10, nc = 10, 1:100) +r1 <- raster(m) +r2 <- raster(m) +r1[sample(1:ncell(r1), 20)] <- NA +r2[sample(1:ncell(r2), 20)] <- NA +s <- stack(r1, r2) + + +# Effect of the synchroniseNA() function +plot(s) # Not yet synchronised +s <- synchroniseNA(s) +plot(s) # Synchronised +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp +} + diff --git a/man/virtualspecies-package.Rd b/man/virtualspecies-package.Rd new file mode 100644 index 0000000..2e38f19 --- /dev/null +++ b/man/virtualspecies-package.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{package} +\name{virtualspecies-package} +\alias{virtualspecies-package} +\title{Generation of virtual species} +\description{ +This package allows generating virtual species distributions, for example +for testing species distribution modelling protocols. For a complete tutorial, +see http://borisleroy.com/virtualspecies +} +\details{ +The process of generating a virtual species distribution is divided into +four major steps. +\enumerate{ +\item{Generate a virtual species distributions from environmental variables. +This can be done by +\itemize{ +\item{defining the response functions to each environmental +variables with \code{\link{generateSpFromFun}}} +\item{computing a PCA among +environmental variables, and simulating the response of the species along +the two first axes of the PCA with \code{\link{generateSpFromPCA}}}} +This step can be entirely randomised with \code{\link{generateRandomSp}} +} +\item{Convert the virtual species distribution into presence-absence, with +\code{\link{convertToPA}}} +\item{Facultatively, introduce a distribution bias with +\code{\link{limitDistribution}}} +\item{Sample occurrence points (presence only or presence-absence) inside the +virtual species distribution, either randomly or with biases, with +\code{\link{sampleOccurrences}}} +} + +There are other useful functions in the package: +\itemize{ +\item{\code{\link{formatFunctions}}: this is a helper function to format and +illustrate the response functions as a correct input for \code{\link{generateSpFromFun}}} +\item{\code{\link{plotResponse}}: to visualise the species-environment relationship +of the virtual species} +\item{\code{\link{removeCollinearity}}: this function can be used to remove +collinearity among variables of a stack by selecting a subset of +non-intercorrelated variables} +\item{\code{\link{synchroniseNA}}: this function can be used to synchronise +NA values among layers of a stack} +} + + + + +This packages makes use of different other packages: +\itemize{ +\item{This package makes extensive use of the package \code{\link{raster}} to obtain spatialised +environmental variables, and produce spatialised virtual species distributions.} +\item{\code{\link{dismo}} is used to sample occurrence data from the generated virtual species.} +\item{\code{\link{ade4}} is used to generate species with a PCA approach.} +\item{\code{\link{rworldmap}} is used to obtain free world shapefiles, in order to create +dispersal limitations and sampling biases.} +} +} +\author{ +Boris Leroy \email{leroy.boris@gmail.com} + +with help from C. N. Meynard, C. Bellard & F. Courchamp + +Maintainer: Boris Leroy \email{leroy.boris@gmail.com} +} +