-
Notifications
You must be signed in to change notification settings - Fork 21
/
bm_SampleFactorLevels.R
250 lines (240 loc) · 10.5 KB
/
bm_SampleFactorLevels.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
###################################################################################################
##' @name bm_SampleFactorLevels
##' @aliases bm_SampleFactorLevels
##' @aliases bm_SampleFactorLevels.raster
##' @aliases bm_SampleFactorLevels.data.frame
##' @author Damien Georges
##'
##' @title Sample all levels of a factorial variable
##'
##' @description This internal \pkg{biomod2} function allows the user to sample all levels of all
##' the factorial variables contained in a \code{data.frame} or \code{\link[terra:rast]{SpatRaster}}
##' object.
##'
##' @param expl.var a \code{data.frame} or \code{\link[terra:rast]{SpatRaster}}
##' object containing the explanatory variables (in columns or layers)
##' @param mask.out a \code{data.frame} or \code{\link[terra:rast]{SpatRaster}}
##' object containing the area that has already been sampled (\emph{factor
##' levels within this mask will not be sampled})
##' @param mask.in a \code{data.frame} or \code{\link[terra:rast]{SpatRaster}}
##' object containing areas where factor levels are to be sampled in priority.
##' \emph{Note that if after having explored these masks, some factor levels
##' remain unsampled, they will be sampled in the reference input object \code{expl.var}.}
##'
##'
##' @return
##'
##' A \code{vector} of \code{numeric} values corresponding to either row (\code{data.frame}) or
##' cell (\code{\link[terra:rast]{SpatRaster}}) numbers, each refering to a single level of a
##' single factorial variable.
##'
##' In case no factorial variable is found in the input object, \code{NULL} is returned.
##'
##'
##' @details
##'
##' The \code{expl.var}, \code{mask.out} and \code{mask.in} parameters must be coherent in terms of
##' dimensions :
##' \itemize{
##' \item same number of rows for \code{data.frame} objects
##' \item same resolution, projection system and number of cells for \code{\link[terra:rast]{SpatRaster}} objects
##' \cr \cr
##' }
##'
##' If \code{mask.in} contains several columns (\code{data.frame}) or layers
##' (\code{\link[terra:rast]{SpatRaster}}), then their order matters :
##' they will be considered successively to sample missing factor levels. \cr \cr
##'
##' \itemize{
##' \item Values in \code{data.frame} will be understood as :
##' \itemize{
##' \item \code{FALSE} : out of mask
##' \item \code{TRUE} : in mask
##' }
##' \item Values in \code{\link[terra:rast]{SpatRaster}} will be understood as :
##' \itemize{
##' \item \code{NA} : out of mask
##' \item \code{not NA} : in mask
##' }
##' }
##'
##'
##' @keywords sample factor
##'
##' @seealso \code{\link{bm_PseudoAbsences}}, \code{\link{bm_CrossValidation}}
##' @family Secundary functions
##'
##'
##' @examples
##' library(terra)
##'
##' ## Create raster data
##' ras.1 <- ras.2 <- mask.out <- rast(nrows = 10, ncols = 10)
##' ras.1[] <- as.factor(rep(c(1, 2, 3, 4, 5), each = 20))
##' ras.1 <- as.factor(ras.1)
##' ras.2[] <- rnorm(100)
##' stk <- c(ras.1, ras.2)
##' names(stk) <- c("varFact", "varNorm")
##'
##' ## define a mask for already sampled points
##' mask.out[1:40] <- 1
##'
##' ## define a list of masks where we want to sample in priority
##' mask.in <- list(ras.1, ras.1)
##' mask.in[[1]][1:80] <- NA ## only level 5 should be sampled in this mask
##' mask.in[[1]][21:80] <- NA ## only levels 1 and 5 should be sampled in this mask
##'
##' ## Sample all factor levels
##' samp1 <- bm_SampleFactorLevels(expl.var = stk, mask.out = mask.out)
##' samp2 <- bm_SampleFactorLevels(expl.var = stk, mask.in = mask.in)
##' samp3 <- bm_SampleFactorLevels(expl.var = stk, mask.out = mask.out, mask.in = mask.in)
##'
##'
##' @importFrom terra rast cats mask subset is.factor values
##' @export
##'
##'
###################################################################################################
bm_SampleFactorLevels <- function(expl.var, mask.out = NULL, mask.in = NULL)
{
if (inherits(expl.var, 'SpatRaster')) {
fact.level.cells <- bm_SampleFactorLevels.SpatRaster(expl.var, mask.out = mask.out, mask.in = mask.in)
return(fact.level.cells)
} else if (inherits(expl.var, 'data.frame')) {
fact.level.cells <- bm_SampleFactorLevels.data.frame(expl.var, mask.out = mask.out, mask.in = mask.in)
return(fact.level.cells)
} else {
warning(paste0("\nunsupported input data.",
"\nexpl.var should be a Raster* object or a data.frame.",
"\n NULL returned"))
return(NULL)
}
}
bm_SampleFactorLevels.SpatRaster <- function(expl.var, mask.out = NULL, mask.in = NULL)
{
## check if some factorial variables are in the input data
fact.var <- which(is.factor(expl.var))
if(any(fact.var))
{ ## some factorial variables present
fact.level.cells <- as.numeric(unlist(sapply(fact.var, function(f)
{
## initialize the list of cells that are selected
selected.cells <- NULL
## get the factor levels on the full dataset
fact.level.names <- cats(subset(expl.var, f))[[1]][,2]
fact.level <- fact.level.original <- cats(subset(expl.var, f))[[1]][,1]
cat("\n\t> fact.level for", names(expl.var)[f], ":\t", paste(fact.level, fact.level.names, sep = ":", collapse = "\t"))
## mask containing points that have already been sampled ------------------------------------
if (!is.null(mask.out))
{
## check the factor levels that have already been sampled
fact.levels.sampled <-
unique(na.omit(
values((mask(subset(expl.var, f), mask.out, maskvalues = c(1), inverse = TRUE)))
))
## update levels names (lost during mask conversion)
cat("\n\t - according to mask.out levels", fact.levels.sampled, "have already been sampled")
## update the list of factor levels to sample
fact.level <- setdiff(fact.level, fact.levels.sampled)
}
## if there still is some levels to sample --------------------------------------------------
## take a random value of them in the full dataset
if (length(fact.level) > 0) {
cat("\n\t - levels", fact.level, "will be sampled in the original raster")
selected.cells <- c(selected.cells, sapply(fact.level, function(fl) {
ind = which(subset(expl.var, f)[] == fl)
if (length(ind) > 0) {
return(sample(ind, 1))
}
}))
}
return(unlist(selected.cells))
})))
return(unique(fact.level.cells))
} else { ## no factorial variable
return(NULL)
}
}
bm_SampleFactorLevels.data.frame <- function(expl.var, mask.out = NULL, mask.in = NULL)
{
## check if some factorial variables are in the input data
fact.var <- which(sapply(expl.var, is.factor))
if(any(fact.var))
{ ## some factorial variables present
fact.level.cells <- as.numeric(unlist(sapply(fact.var, function(f)
{
## initialize the list of cells that are selected
selected.cells <- NULL
## get the factor levels on the full dataset
fact.level <- fact.level.original <- levels(expl.var[, f])
cat("\n\t> fact.level for", colnames(expl.var)[f], ":\t", paste(1:length(fact.level), fact.level, sep = ":", collapse = "\t"))
## mask containing points that have already been sampled ------------------------------------
if (!is.null(mask.out))
{
## check the factor levels that have already been sampled
fact.levels.sampled <- unique(na.omit(as.character(expl.var[mask.out[, 1], f])))
## remove already sampled points from candidates
expl.var[mask.out[, 1], ] <- NA
cat("\n\t - according to mask.out levels", fact.levels.sampled, "have already been sampled")
## update the list of factor levels to sample
fact.level <- setdiff(fact.level, fact.levels.sampled)
}
## if there still is some levels to sample --------------------------------------------------
if(length(fact.level))
{
## a. try first to sample factors in the given masks -------------------
if (!is.null(mask.in))
{ ## list of mask we want to sample in (order matter!)
for (mask.in.id in 1:ncol(mask.in))
{
if (length(fact.level) > 0) ## if there still is some levels to sample
{
## update the masked version of the factorial raster
x.f.masked <- as.character(expl.var[, f])
x.f.masked[!mask.in[, mask.in.id]] <- NA
x.f.levels <- unique(na.omit(x.f.masked))
## get the list of levels that could be sampled in this mask
fact.levels.in.m.in <- intersect(fact.level, x.f.levels)
if (length(fact.levels.in.m.in) > 0) {
cat("\n\t - levels", fact.levels.in.m.in, "will be sampled in mask.out", mask.in.id)
selected.cells <- c(selected.cells, sapply(fact.levels.in.m.in, function(fl){
candidate.cells <- na.omit(which(x.f.masked[] == fl))
selected.cell <- NULL
if (length(candidate.cells) == 1) { ## single candidate cell
selected.cell <- candidate.cells
} else if (length(candidate.cells) > 1) { ## multi candidate cells
selected.cell <- sample(candidate.cells, 1)
}
return(selected.cell)
}))
## update the list of factor levels to sample
fact.level <- setdiff(fact.level, fact.levels.in.m.in)
}
}
} ## end loop over mask.in
}
## if there still is some levels to sample ------------------------------------------------
## b. take a random value of them in the full dataset
## !! this should be tricky if mask.in arg is given because the value will be picked out of
## mask.in but is necessary to ensure that models will run smoothly
if (length(fact.level) > 0){
cat("\n\t - levels", fact.level, "will be sampled in the original data.frame")
selected.cells <- c(selected.cells, sapply(fact.level, function(fl) {
candidate.cells <- na.omit(which(expl.var[, f] == fl))
selected.cell <- NULL
if (length(candidate.cells) <= 1) { ## single candidate cell
selected.cell <- candidate.cells
} else if (length(candidate.cells) > 1) { ## multi candidate cells
selected.cell <- sample(candidate.cells, 1)
}
return(selected.cell)
}))
}
}
return(unlist(selected.cells))
})))
return(unique(fact.level.cells))
} else { ## no factorial variable
return(NULL)
}
}