/
isplit.R
95 lines (87 loc) · 2.63 KB
/
isplit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#
# Copyright (c) 2008-2010 Revolution Analytics
# Updated 2023 by Peter Meilstrup
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
iwhich <- function(nf, ind) {
n <- length(ind)
if (n == 0)
stop('illegal zero length vector')
x <- rep(TRUE, length(nf[[1]]))
for (i in seq_len(n))
x <- x & nf[[i]] == ind[i]
which(x)
}
#' Split Iterator
#'
#' Returns an iterator that divides the data in the vector \code{x} into the
#' groups defined by \code{f}.
#'
#'
#' @param x vector or data frame of values to be split into groups.
#' @param f a factor or list of factors used to categorize \code{x}.
#' @param drop logical indicating if levels that do not occur should be
#' dropped.
#' @param \dots current ignored.
#' @return The split iterator.
#' @seealso \code{\link{split}}
#' @details Originally from the `iterators` package.
#' @examples
#'
#' x <- rnorm(200)
#' f <- factor(sample(1:10, length(x), replace = TRUE))
#'
#' it <- isplit(x, f)
#' expected <- split(x, f)
#'
#' for (i in expected) {
#' actual <- nextOr(it, break)
#' stopifnot(actual$value == i)
#' }
#'
#' @export isplit
isplit <- function(x, f, drop=FALSE, ...) {
UseMethod('isplit')
}
#' @exportS3Method
isplit.default <- function(x, f, drop=FALSE, ...) {
if (!is.list(f)) f <- list(f)
cf <- lapply(f, function(a) if (is.factor(a)) a else as.factor(a))
nf <- lapply(cf, as.integer)
flevels <- lapply(f, function(a) if (is.factor(a)) levels(a) else sort(unique.default(a)))
it <- icountn(unlist(lapply(cf, nlevels)))
nextOr_ <- function(or) {
repeat {
i <- nextOr(it, return(or))
j <- iwhich(nf, i)
if (!drop || length(j) > 0)
break
}
k <- seq_along(i)
names(k) <- names(cf)
key <- lapply(k, function(x) flevels[[x]][i[x]])
list(value=x[j], key=key)
}
iteror_internal(nextOr_)
}
# define the data frame method which uses the default method
#' @exportS3Method isplit data.frame
isplit.data.frame <- function(x, f, drop=FALSE, ...) {
it <- isplit(seq_len(nrow(x)), f, drop=drop, ...)
nextOr_ <- function(or) {
i <- nextOr(it, return(or))
list(value=x[i$value,, drop=FALSE], key=i$key)
}
iteror_internal(nextOr_)
}