Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

structured tests #68

Merged
merged 12 commits into from May 26, 2016
4 changes: 2 additions & 2 deletions .travis.yml
Expand Up @@ -25,6 +25,6 @@ after_success:
notifications:
email:
recipients:
- work@wrig.de
- bjoernhergen.laabs@student.uni-luebeck.de
on_success: change
on_failure: change
on_failure: change
509 changes: 1 addition & 508 deletions ranger-r-package/ranger/tests/testthat/test_ranger.R

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_char.R
@@ -0,0 +1,32 @@
##This skript provides the tests for character vector in data

library(ranger)
library(survival)
context("ranger")

##Initialize random forests
dat <- iris
dat$Test <- paste0("AA",as.character(1:nrow(dat)))

##Tests
test_that("no warning if character vector in data", {
expect_that(ranger(Species ~ ., data = dat, verbose = FALSE),
not(gives_warning()))
})

test_that("no error if character vector in data, prediction", {
rf <- ranger(Species~., dat, write.forest = TRUE)
expect_that(predict(rf, dat),
not(throws_error()))
})

test_that("no warning if character vector in data, alternative interface", {
expect_that(ranger(dependent.variable.name = "Species", data = dat, verbose = FALSE),
not(gives_warning()))
})

test_that("no error if character vector in data, alternative interface, prediction", {
rf <- ranger(dependent.variable.name = "Species", data = dat, verbose = FALSE, write.forest = TRUE)
expect_that(predict(rf, dat),
not(throws_error()))
})
117 changes: 117 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_class.R
@@ -0,0 +1,117 @@
##This skript provides the tests for random forests for classification

library(ranger)
library(survival)
context("ranger")

##Initialize the random forest for classification
dat <- data.matrix(iris)

rg.class <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE)
rg.mat <- ranger(dependent.variable.name = "Species", data = dat, write.forest = TRUE, classification = TRUE)

##Basic tests (for all random forests equal)
test_that("classification result is of class ranger with 14 elements", {
expect_that(rg.class, is_a("ranger"))
expect_that(length(rg.class), equals(14))
})

test_that("results have 500 trees", {
expect_that(rg.class$num.trees, equals(500))
})

test_that("results have right number of independent variables", {
expect_that(rg.class$num.independent.variables, equals(ncol(iris) - 1))
})

test_that("Alternative interface works for classification", {
rf <- ranger(dependent.variable.name = "Species", data = iris)
expect_that(rf$treetype, equals("Classification"))
})

test_that("Matrix interface works for classification", {
expect_that(rg.mat$treetype, equals("Classification"))
expect_that(rg.mat$forest$independent.variable.names, equals(colnames(iris)[1:4]))
})

test_that("Matrix interface prediction works for classification", {
expect_that(predict(rg.mat, dat), not(throws_error()))
})

test_that("save.memory option works for classification", {
rf <- ranger(Species ~ ., data = iris, save.memory = TRUE)
expect_that(rf$treetype, equals("Classification"))
})

test_that("predict.all for classification returns numeric matrix of size trees x n", {
rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
pred <- predict(rf, iris, predict.all = TRUE)
expect_that(pred$predictions, is_a("matrix"))
expect_that(dim(pred$predictions),
equals(c(nrow(iris), rf$num.trees)))
})

test_that("Majority vote of predict.all for classification is equal to forest prediction", {
rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
pred_forest <- predict(rf, iris, predict.all = FALSE)
pred_trees <- predict(rf, iris, predict.all = TRUE)
## Majority vote
pred_num <- apply(pred_trees$predictions, 1, function(x) {
which(tabulate(x) == max(tabulate(x)))
})
pred <- factor(pred_num, levels = 1:length(rf$forest$levels),
labels = rf$forest$levels)
expect_that(pred, equals(pred_forest$predictions))
})

test_that("Alternative interface classification prediction works if only independent variable given, one independent variable", {
n <- 50

dt <- data.frame(x = runif(n), y = factor(rbinom(n, 1, 0.5)))
rf <- ranger(dependent.variable.name = "y", data = dt, num.trees = 5, write.forest = TRUE)
expect_that(predict(rf, dt),
not(throws_error()))
expect_that(predict(rf, dt[, 1, drop = FALSE]),
not(throws_error()))

dt2 <- data.frame(y = factor(rbinom(n, 1, 0.5)), x = runif(n))
rf <- ranger(dependent.variable.name = "y", data = dt2, num.trees = 5, write.forest = TRUE)
expect_that(predict(rf, dt2),
not(throws_error()))
expect_that(predict(rf, dt2[, 2, drop = FALSE]),
not(throws_error()))
})

test_that("Alternative interface classification prediction works if only independent variable given, two independent variables", {
n <- 50

dt <- data.frame(x1 = runif(n), x2 = runif(n), y = factor(rbinom(n, 1, 0.5)))
rf <- ranger(dependent.variable.name = "y", data = dt, num.trees = 5, write.forest = TRUE)
expect_that(predict(rf, dt),
not(throws_error()))
expect_that(predict(rf, dt[, 1:2]),
not(throws_error()))

dt2 <- data.frame(y = factor(rbinom(n, 1, 0.5)), x1 = runif(n), x2 = runif(n))
rf <- ranger(dependent.variable.name = "y", data = dt2, num.trees = 5, write.forest = TRUE)
expect_that(predict(rf, dt2),
not(throws_error()))
expect_that(predict(rf, dt2[, 2:3]),
not(throws_error()))
})

##Special tests for random forests for classification
test_that("predict works for single observations, classification", {
pred <- predict(rg.class, head(iris, 1))
expect_that(pred$predictions, equals(iris[1,"Species"]))
})

test_that("confusion matrix is of right dimension", {
expect_that(dim(rg.class$confusion.matrix),
equals(rep(nlevels(iris$Species), 2)))
})

test_that("confusion matrix rows are the true classes", {
expect_that(as.numeric(rowSums(rg.class$confusion.matrix)),
equals(as.numeric(table(iris$Species))))
})
34 changes: 34 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_imp.R
@@ -0,0 +1,34 @@
##This skript provides the tests for importance measures

library(ranger)
library(survival)
context("ranger")

##Initialize the random forests

rg.imp <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
importance = "impurity")
rg.perm <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
importance = "permutation")
rg.scale.perm <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
importance = "permutation", scale.permutation.importance = TRUE)

##Tests
test_that("importance measures work", {
expect_that(rg.imp$variable.importance, is_a("numeric"))
expect_that(rg.perm$variable.importance, is_a("numeric"))
expect_that(rg.scale.perm$variable.importance, is_a("numeric"))
})

test_that("gini importance is larger than 1", {
expect_that(rg.imp$variable.importance[1], is_more_than(1))
})

test_that("unscaled importance is smaller than 1", {
expect_that(rg.perm$variable.importance[1], is_less_than(1))
})

test_that("scaled importance is larger than 1", {
expect_that(rg.scale.perm$variable.importance[1], is_more_than(1))
})

103 changes: 103 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_inbag.R
@@ -0,0 +1,103 @@
##This skript provides the tests for inbag functions

library(ranger)
library(survival)
context("ranger")

##Tests
test_that("Inbag count matrix if of right size, with replacement", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE)
expect_that(dim(data.frame(rf$inbag.counts)),
equals(c(nrow(iris), rf$num.trees)))
})

test_that("Inbag count matrix if of right size, without replacement", {
rf <- ranger(Species ~ ., iris, num.trees = 5, replace = FALSE, keep.inbag = TRUE)
expect_that(dim(data.frame(rf$inbag.counts)),
equals(c(nrow(iris), rf$num.trees)))
})

test_that("Inbag count matrix if of right size, with replacement, weighted", {
rf <- ranger(Species ~ ., iris, num.trees = 5, case.weights = runif(nrow(iris)), keep.inbag = TRUE)
expect_that(dim(data.frame(rf$inbag.counts)),
equals(c(nrow(iris), rf$num.trees)))
})

test_that("Inbag count matrix if of right size, without replacement, weighted", {
rf <- ranger(Species ~ ., iris, num.trees = 5, replace = FALSE, case.weights = runif(nrow(iris)), keep.inbag = TRUE)
expect_that(dim(data.frame(rf$inbag.counts)),
equals(c(nrow(iris), rf$num.trees)))
})


test_that("Number of samples is right sample fraction, replace=FALSE, default", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE)
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})
sample.fraction <- mean(num.inbag/nrow(iris))

expect_that(sample.fraction, is_more_than(0.6))
expect_that(sample.fraction, is_less_than(0.7))
})

test_that("Number of samples is right sample fraction, replace=FALSE, 0.3", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3)
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})
sample.fraction <- mean(num.inbag/nrow(iris))

expect_that(sample.fraction, is_more_than(0.25))
expect_that(sample.fraction, is_less_than(0.35))
})

test_that("Number of samples is right sample fraction, replace=TRUE, default", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE)
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})

sample.fraction <- mean(num.inbag/nrow(iris))
expected.sample.fraction <- 1-exp(-1)

expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
})

test_that("Number of samples is right sample fraction, replace=TRUE, 0.5", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5)
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})

sample.fraction <- mean(num.inbag/nrow(iris))
expected.sample.fraction <- 1-exp(-0.5)

expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
})

test_that("Number of samples is right sample fraction, replace=FALSE, 0.3, weighted", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3, case.weights = runif(nrow(iris)))
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})
sample.fraction <- mean(num.inbag/nrow(iris))

expect_that(sample.fraction, is_more_than(0.25))
expect_that(sample.fraction, is_less_than(0.35))
})

test_that("Number of samples is right sample fraction, replace=TRUE, 0.5, weighted", {
rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5, case.weights = runif(nrow(iris)))
num.inbag <- sapply(rf$inbag.counts, function(x) {
sum(x > 0)
})

sample.fraction <- mean(num.inbag/nrow(iris))
expected.sample.fraction <- 1-exp(-0.5)

expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
})
24 changes: 24 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_pred.R
@@ -0,0 +1,24 @@
##This skript provides the tests for predictions

library(ranger)
library(survival)
context("ranger")


test_that("predict returns good prediction", {
rf <- ranger(Species ~ ., iris, write.forest = TRUE)
pred <- predict(rf, iris)
expect_that(mean(iris$Species == predictions(pred)), is_more_than(0.9))
})

test_that("case weights work", {
expect_that(ranger(Species ~ ., iris, num.trees = 5, case.weights = rep(1, nrow(iris))),
not(throws_error()))
## Should only predict setosa now
weights <- c(rep(1, 50), rep(0, 100))
rf <- ranger(Species ~ ., iris, num.trees = 5, case.weights = weights, write.forest = TRUE)
pred <- predict(rf, iris)$predictions
expect_that(all(pred == "setosa"), is_true())
})


36 changes: 36 additions & 0 deletions ranger-r-package/ranger/tests/testthat/test_ranger_prob.R
@@ -0,0 +1,36 @@
##This skript provides the tests for random forests for probability estimation

library(ranger)
library(survival)
context("ranger")

##Initialize random forest
train.idx <- sample(nrow(iris), 2/3 * nrow(iris))
iris.train <- iris[train.idx, ]
iris.test <- iris[-train.idx, ]

rg.prob <- ranger(Species ~ ., data = iris.train, write.forest = TRUE, probability = TRUE)
prob <- predict(rg.prob, iris.test)

##Tests
test_that("probability estimations are a matrix with correct size", {
expect_that(prob$predictions, is_a("matrix"))
expect_that(nrow(prob$predictions), equals(nrow(iris.test)))
expect_that(ncol(prob$predictions), equals(length(rg.prob$forest$levels)))
})

test_that("probability estimations are between 0 and 1 and sum to 1", {
expect_that(all(prob$predictions > -1e-5 & prob$predictions <= 1 + 1e-5), is_true())
expect_that(rowSums(prob$predictions), equals(rep(1, nrow(prob$predictions))))
})

test_that("save.memory option works for probability", {
rf <- ranger(Species ~ ., data = iris, probability = TRUE, save.memory = TRUE)
expect_that(rf$treetype, equals("Probability estimation"))
})

test_that("predict works for single observations, probability prediction", {
rf <- ranger(Species ~ ., iris, write.forest = TRUE, probability = TRUE)
pred <- predict(rf, head(iris, 1))
expect_that(names(which.max(pred$predictions)), equals(as.character(iris[1,"Species"])))
})