Skip to content

Commit

Permalink
Merge pull request #34 from inbo/aggregate
Browse files Browse the repository at this point in the history
add a method for `aggregate_impute()` for `aggregatedImputed` objects
  • Loading branch information
ThierryO committed Nov 25, 2017
2 parents d32a889 + 776472b commit 8cddb54
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 15 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Expand Up @@ -11,3 +11,4 @@
^_projects$
^_builds$
^_steps$
^NEWS\.md$
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,8 +1,8 @@
Package: multimput
Type: Package
Title: Using Multiple Imputation to Address Missing Data
Version: 0.2.7
Date: 2017-07-13
Version: 0.2.7.9000
Date: 2017-11-18
Authors@R: c(person("Thierry", "Onkelinx", role = c("aut", "cre"), email = "thierry.onkelinx@inbo.be"),
person("Koen", "Devos", role = "aut"),
person("Paul", "Quataert", role = "aut"))
Expand Down Expand Up @@ -35,10 +35,10 @@ Remotes: inbo/INLA
Roxygen: list(wrap = FALSE)
LazyData: TRUE
RoxygenNote: 6.0.1
Collate:
Collate:
'rawImputed_class.R'
'aggregate_impute.R'
'aggregatedImputed_class.R'
'aggregate_impute.R'
'datasets.R'
'generateData.R'
'import_S3_classes.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -23,6 +23,7 @@ importFrom(dplyr,do_)
importFrom(dplyr,filter_)
importFrom(dplyr,funs)
importFrom(dplyr,group_by_)
importFrom(dplyr,inner_join)
importFrom(dplyr,mutate_)
importFrom(dplyr,n)
importFrom(dplyr,select_)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
@@ -0,0 +1,3 @@
# multimput 0.2.7.9000 (2017-11-18)

- `aggregate_impute()` now also works on `aggregatedImputed` objects (#34)
75 changes: 74 additions & 1 deletion R/aggregate_impute.R
Expand Up @@ -22,7 +22,10 @@ setMethod(
f = "aggregate_impute",
signature = signature(object = "ANY"),
definition = function(object, grouping, fun, filter, join){
stop("aggregate_impute() requires a 'rawImputed' object. See ?impute")
stop(
"aggregate_impute() requires a 'rawImputed' or 'aggregatedImputed' object.
See ?impute or ?aggregate_impute"
)
}
)

Expand Down Expand Up @@ -127,3 +130,73 @@ setMethod(
)
}
)

#' @rdname aggregate_impute
#' @importFrom methods setMethod
#' @importFrom assertthat assert_that
#' @importFrom dplyr %>% group_by_ summarise_at funs vars mutate_ filter_ n semi_join starts_with inner_join
#' @importFrom methods new
#' @importFrom stats setNames
#' @importFrom digest sha1
#' @include aggregatedImputed_class.R
setMethod(
f = "aggregate_impute",
signature = signature(object = "aggregatedImputed"),
definition = function(object, grouping, fun, filter, join){
assert_that(is.character(grouping))
assert_that(inherits(fun, "function"))

id_column <- paste0("ID", sha1(Sys.time()))
data <- object@Covariate %>%
mutate_(.dots = "seq_along(%s)" %>%
sprintf(grouping[1]) %>%
setNames(id_column)
)
imputation <- object@Imputation %>%
as.data.frame() %>%
mutate_(.dots = "seq_along(Imputation0001)" %>%
setNames(id_column)
)

if (!missing(filter)) {
assert_that(is.list(filter))
data <- data %>%
filter_(.dots = filter)
}

if (!missing(join)) {
if (inherits(join, "data.frame")) {
join <- list(join)
}
assert_that(is.list(join))
if (!all(sapply(join, inherits, "data.frame"))) {
stop("not all objects in join are data.frames")
}
for (i in seq_along(join)) {
if (!all(colnames(join[[i]]) %in% colnames(data))) {
stop("all columns in join with be available in the dataset")
}
data <- data %>%
semi_join(join[[i]], by = colnames(join[[i]]))
}
}

total <- data %>%
inner_join(imputation, by = id_column) %>%
group_by_(.dots = grouping) %>%
summarise_at(
.funs = funs(fun),
.vars = vars(colnames(object@Imputation))
)

new(
"aggregatedImputed",
Covariate = total %>%
select_(~-starts_with("Imputation")) %>%
as.data.frame(),
Imputation = total %>%
select_(~starts_with("Imputation")) %>%
as.matrix()
)
}
)
4 changes: 4 additions & 0 deletions man/aggregate_impute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/test_ab_impute.R
@@ -1,12 +1,12 @@
describe("impute", {
context("impute")
dataset <- generateData(n.year = 10, n.site = 50, n.run = 1)
dataset$Count[sample(nrow(dataset), 50)] <- NA
dataset <- generateData(n.year = 10, n.site = 10, n.run = 1)
dataset$Count[sample(nrow(dataset), 10)] <- NA
dataset$fYear <- factor(dataset$Year)
dataset$fPeriod <- factor(dataset$Period)
dataset$fSite <- factor(dataset$Site)
dataset$Bottom <- 10000
n.imp <- 50L
n.imp <- 10L
it("handles lm", {
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
expect_is(
Expand Down
43 changes: 40 additions & 3 deletions tests/testthat/test_bbb_aggregate_impute.R
@@ -1,7 +1,7 @@
context("aggregate_impute")
describe("aggregate_impute", {
dataset <- generateData(n.year = 10, n.site = 50, n.run = 1)
dataset$Count[sample(nrow(dataset), 50)] <- NA
dataset <- generateData(n.year = 10, n.site = 10, n.run = 1)
dataset$Count[sample(nrow(dataset), 10)] <- NA
dataset$Bottom <- 100000
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
imputed <- impute(data = dataset, model = model)
Expand Down Expand Up @@ -104,7 +104,7 @@ describe("aggregate_impute", {
it("checks the sanity of the arguments", {
expect_error(
aggregate_impute(object = "junk"),
"aggregate_impute\\(\\) requires a 'rawImputed' object. See \\?impute"
"aggregate_impute\\(\\) requires a 'rawImputed' or 'aggregatedImputed' object"
)
expect_error(
aggregate_impute(imputed, grouping = "junk", fun = sum),
Expand All @@ -127,4 +127,41 @@ describe("aggregate_impute", {
"filter is not a list"
)
})

it("aggregates an aggregatedImputed", {
aggr <- aggregate_impute(
imputed,
grouping = grouping,
fun = fun
)
expect_is(
aggr2 <- aggregate_impute(aggr, grouping = "Year", fun = max),
"aggregatedImputed"
)
expect_is(
aggr2 <- aggregate_impute(
aggr,
grouping = "Year",
fun = mean,
filter = list("Period <= 3")
),
"aggregatedImputed"
)
})
})

test_that("aggregate_impute() works on aggregatedImputed objects", {
dataset <- generateData(n.year = 10, n.site = 50, n.run = 1)
dataset$Count[sample(nrow(dataset), 50)] <- NA
dataset$Bottom <- 100000
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
imputed <- impute(data = dataset, model = model)
grouping <- c("Year", "Period")
fun <- sum
aggr <- aggregate_impute(imputed, grouping = grouping, fun = fun)
grouping2 <- "Year"
expect_is(
aggr2 <- aggregate_impute(aggr, grouping = grouping2, fun = sum),
"aggregatedImputed"
)
})
8 changes: 4 additions & 4 deletions tests/testthat/test_ccc_model_impute.R
@@ -1,6 +1,6 @@
context("model_impute")
describe("model_impute", {
dataset <- generateData(n.year = 10, n.site = 50, n.run = 1)
dataset <- generateData(n.year = 10, n.site = 10, n.run = 1)
it("has no effect when there are no missing values", {
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
imputed <- impute(data = dataset, model = model)
Expand All @@ -22,7 +22,7 @@ describe("model_impute", {
)
})

dataset$Count[sample(nrow(dataset), 50)] <- NA
dataset$Count[sample(nrow(dataset), 10)] <- NA
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
imputed <- impute(data = dataset, model = model)
aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum)
Expand All @@ -49,8 +49,8 @@ describe("model_impute", {
model_impute(object = "junk"),
"model_impute\\(\\) doesn't handle a 'character' object"
)
dataset <- generateData(n.year = 10, n.site = 50, n.run = 1)
dataset$Count[sample(nrow(dataset), 50)] <- NA
dataset <- generateData(n.year = 10, n.site = 10, n.run = 1)
dataset$Count[sample(nrow(dataset), 10)] <- NA
model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset)
imputed <- impute(data = dataset, model = model)
aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum)
Expand Down

0 comments on commit 8cddb54

Please sign in to comment.