imbs-hl · mnwright · May 26, 2016 · Mar 30, 2016 · Mar 31, 2016 · Apr 5, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -25,6 +25,6 @@ after_success:
 notifications:
   email:
     recipients:
-        - work@wrig.de
+        - bjoernhergen.laabs@student.uni-luebeck.de
     on_success: change
-    on_failure: change
+    on_failure: change
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger.R b/ranger-r-package/ranger/tests/testthat/test_ranger.R
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_char.R b/ranger-r-package/ranger/tests/testthat/test_ranger_char.R
@@ -0,0 +1,32 @@
+##This skript provides the tests for character vector in data
+
+library(ranger)
+library(survival)
+context("ranger")
+
+##Initialize random forests
+dat <- iris
+dat$Test <- paste0("AA",as.character(1:nrow(dat)))
+
+##Tests
+test_that("no warning if character vector in data", {
+  expect_that(ranger(Species ~ ., data = dat, verbose = FALSE), 
+              not(gives_warning()))
+})
+
+test_that("no error if character vector in data, prediction", {
+  rf <- ranger(Species~., dat, write.forest = TRUE)
+  expect_that(predict(rf, dat),
+              not(throws_error()))
+})
+
+test_that("no warning if character vector in data, alternative interface", {
+  expect_that(ranger(dependent.variable.name = "Species", data = dat, verbose = FALSE), 
+              not(gives_warning()))
+})
+
+test_that("no error if character vector in data, alternative interface, prediction", {
+  rf <- ranger(dependent.variable.name = "Species", data = dat, verbose = FALSE, write.forest = TRUE)
+  expect_that(predict(rf, dat),
+              not(throws_error()))
+})
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_class.R b/ranger-r-package/ranger/tests/testthat/test_ranger_class.R
@@ -0,0 +1,117 @@
+##This skript provides the tests for random forests for classification
+
+library(ranger)
+library(survival)
+context("ranger")
+
+##Initialize the random forest for classification
+dat <- data.matrix(iris)
+
+rg.class <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE)
+rg.mat   <- ranger(dependent.variable.name = "Species", data = dat, write.forest = TRUE, classification = TRUE)
+
+##Basic tests (for all random forests equal)
+test_that("classification result is of class ranger with 14 elements", {
+  expect_that(rg.class, is_a("ranger"))
+  expect_that(length(rg.class), equals(14))
+})
+
+test_that("results have 500 trees", {
+  expect_that(rg.class$num.trees, equals(500))
+})
+
+test_that("results have right number of independent variables", {
+  expect_that(rg.class$num.independent.variables, equals(ncol(iris) - 1))
+})
+
+test_that("Alternative interface works for classification", {
+  rf <- ranger(dependent.variable.name = "Species", data = iris)
+  expect_that(rf$treetype, equals("Classification"))
+})
+
+test_that("Matrix interface works for classification", {
+  expect_that(rg.mat$treetype, equals("Classification"))
+  expect_that(rg.mat$forest$independent.variable.names, equals(colnames(iris)[1:4]))
+})
+
+test_that("Matrix interface prediction works for classification", {
+    expect_that(predict(rg.mat, dat), not(throws_error()))
+})
+
+test_that("save.memory option works for classification", {
+  rf <- ranger(Species ~ ., data = iris, save.memory = TRUE)
+  expect_that(rf$treetype, equals("Classification"))
+})
+
+test_that("predict.all for classification returns numeric matrix of size trees x n", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
+  pred <- predict(rf, iris, predict.all = TRUE)
+  expect_that(pred$predictions, is_a("matrix"))
+  expect_that(dim(pred$predictions), 
+              equals(c(nrow(iris), rf$num.trees)))
+})
+
+test_that("Majority vote of predict.all for classification is equal to forest prediction", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
+  pred_forest <- predict(rf, iris, predict.all = FALSE)
+  pred_trees <- predict(rf, iris, predict.all = TRUE)
+  ## Majority vote
+  pred_num <- apply(pred_trees$predictions, 1, function(x) {
+    which(tabulate(x) == max(tabulate(x)))
+  })
+  pred <- factor(pred_num, levels = 1:length(rf$forest$levels),
+                 labels = rf$forest$levels)
+  expect_that(pred, equals(pred_forest$predictions))
+})
+
+test_that("Alternative interface classification prediction works if only independent variable given, one independent variable", {
+  n <- 50
+
+  dt <- data.frame(x = runif(n), y = factor(rbinom(n, 1, 0.5)))
+  rf <- ranger(dependent.variable.name = "y", data = dt, num.trees = 5, write.forest = TRUE)
+  expect_that(predict(rf, dt), 
+              not(throws_error()))
+  expect_that(predict(rf, dt[, 1, drop = FALSE]), 
+              not(throws_error()))
+
+  dt2 <- data.frame(y = factor(rbinom(n, 1, 0.5)), x = runif(n))
+  rf <- ranger(dependent.variable.name = "y", data = dt2, num.trees = 5, write.forest = TRUE)
+  expect_that(predict(rf, dt2), 
+              not(throws_error()))
+  expect_that(predict(rf, dt2[, 2, drop = FALSE]), 
+              not(throws_error()))
+})
+
+test_that("Alternative interface classification prediction works if only independent variable given, two independent variables", {
+  n <- 50
+
+  dt <- data.frame(x1 = runif(n), x2 = runif(n), y = factor(rbinom(n, 1, 0.5)))
+  rf <- ranger(dependent.variable.name = "y", data = dt, num.trees = 5, write.forest = TRUE)
+  expect_that(predict(rf, dt), 
+              not(throws_error()))
+  expect_that(predict(rf, dt[, 1:2]), 
+              not(throws_error()))
+
+  dt2 <- data.frame(y = factor(rbinom(n, 1, 0.5)), x1 = runif(n), x2 = runif(n))
+  rf <- ranger(dependent.variable.name = "y", data = dt2, num.trees = 5, write.forest = TRUE)
+  expect_that(predict(rf, dt2), 
+              not(throws_error()))
+  expect_that(predict(rf, dt2[, 2:3]), 
+              not(throws_error()))
+})
+
+##Special tests for random forests for classification
+test_that("predict works for single observations, classification", {
+  pred <- predict(rg.class, head(iris, 1))
+  expect_that(pred$predictions, equals(iris[1,"Species"]))
+})
+
+test_that("confusion matrix is of right dimension", {
+  expect_that(dim(rg.class$confusion.matrix), 
+              equals(rep(nlevels(iris$Species), 2)))
+})
+
+test_that("confusion matrix rows are the true classes", {
+  expect_that(as.numeric(rowSums(rg.class$confusion.matrix)), 
+              equals(as.numeric(table(iris$Species))))
+})
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_imp.R b/ranger-r-package/ranger/tests/testthat/test_ranger_imp.R
@@ -0,0 +1,34 @@
+##This skript provides the tests for importance measures
+
+library(ranger)
+library(survival)
+context("ranger")
+
+##Initialize the random forests
+
+rg.imp <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
+                 importance = "impurity")
+rg.perm <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
+                 importance = "permutation")
+rg.scale.perm <- ranger(Species ~ ., data = iris, verbose = FALSE, write.forest = TRUE,
+                 importance = "permutation", scale.permutation.importance = TRUE)
+
+##Tests
+test_that("importance measures work", {
+  expect_that(rg.imp$variable.importance, is_a("numeric"))
+  expect_that(rg.perm$variable.importance, is_a("numeric"))
+  expect_that(rg.scale.perm$variable.importance, is_a("numeric"))
+})
+
+test_that("gini importance is larger than 1", {
+  expect_that(rg.imp$variable.importance[1], is_more_than(1))
+})
+
+test_that("unscaled importance is smaller than 1", {
+  expect_that(rg.perm$variable.importance[1], is_less_than(1))
+})
+
+test_that("scaled importance is larger than 1", {
+  expect_that(rg.scale.perm$variable.importance[1], is_more_than(1))
+})
+
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_inbag.R b/ranger-r-package/ranger/tests/testthat/test_ranger_inbag.R
@@ -0,0 +1,103 @@
+##This skript provides the tests for inbag functions
+
+library(ranger)
+library(survival)
+context("ranger")
+
+##Tests
+test_that("Inbag count matrix if of right size, with replacement", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE)
+  expect_that(dim(data.frame(rf$inbag.counts)), 
+              equals(c(nrow(iris), rf$num.trees)))
+})
+
+test_that("Inbag count matrix if of right size, without replacement", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, replace = FALSE, keep.inbag = TRUE)
+  expect_that(dim(data.frame(rf$inbag.counts)), 
+              equals(c(nrow(iris), rf$num.trees)))
+})
+
+test_that("Inbag count matrix if of right size, with replacement, weighted", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, case.weights = runif(nrow(iris)), keep.inbag = TRUE)
+  expect_that(dim(data.frame(rf$inbag.counts)), 
+              equals(c(nrow(iris), rf$num.trees)))
+})
+
+test_that("Inbag count matrix if of right size, without replacement, weighted", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, replace = FALSE, case.weights = runif(nrow(iris)), keep.inbag = TRUE)
+  expect_that(dim(data.frame(rf$inbag.counts)), 
+              equals(c(nrow(iris), rf$num.trees)))
+})
+
+
+test_that("Number of samples is right sample fraction, replace=FALSE, default", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE)
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+  sample.fraction <- mean(num.inbag/nrow(iris))
+
+  expect_that(sample.fraction, is_more_than(0.6))
+  expect_that(sample.fraction, is_less_than(0.7))
+})
+
+test_that("Number of samples is right sample fraction, replace=FALSE, 0.3", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3)
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+  sample.fraction <- mean(num.inbag/nrow(iris))
+
+  expect_that(sample.fraction, is_more_than(0.25))
+  expect_that(sample.fraction, is_less_than(0.35))
+})
+
+test_that("Number of samples is right sample fraction, replace=TRUE, default", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE)
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+
+  sample.fraction <- mean(num.inbag/nrow(iris))
+  expected.sample.fraction <- 1-exp(-1)
+
+  expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
+  expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
+})
+
+test_that("Number of samples is right sample fraction, replace=TRUE, 0.5", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5)
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+
+  sample.fraction <- mean(num.inbag/nrow(iris))
+  expected.sample.fraction <- 1-exp(-0.5)
+
+  expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
+  expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
+})
+
+test_that("Number of samples is right sample fraction, replace=FALSE, 0.3, weighted", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3, case.weights = runif(nrow(iris)))
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+  sample.fraction <- mean(num.inbag/nrow(iris))
+
+  expect_that(sample.fraction, is_more_than(0.25))
+  expect_that(sample.fraction, is_less_than(0.35))
+})
+
+test_that("Number of samples is right sample fraction, replace=TRUE, 0.5, weighted", {
+  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5, case.weights = runif(nrow(iris)))
+  num.inbag <- sapply(rf$inbag.counts, function(x) {
+    sum(x > 0)
+  })
+
+  sample.fraction <- mean(num.inbag/nrow(iris))
+  expected.sample.fraction <- 1-exp(-0.5)
+
+  expect_that(sample.fraction, is_more_than(expected.sample.fraction-0.05))
+  expect_that(sample.fraction, is_less_than(expected.sample.fraction+0.05))
+})
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_pred.R b/ranger-r-package/ranger/tests/testthat/test_ranger_pred.R
@@ -0,0 +1,24 @@
+##This skript provides the tests for predictions
+
+library(ranger)
+library(survival)
+context("ranger")
+
+
+test_that("predict returns good prediction", {
+  rf <- ranger(Species ~ ., iris, write.forest = TRUE)
+  pred <- predict(rf, iris)
+  expect_that(mean(iris$Species == predictions(pred)), is_more_than(0.9))
+})
+
+test_that("case weights work", {
+  expect_that(ranger(Species ~ ., iris, num.trees = 5, case.weights = rep(1, nrow(iris))), 
+              not(throws_error()))
+  ## Should only predict setosa now
+  weights <- c(rep(1, 50), rep(0, 100))
+  rf <- ranger(Species ~ ., iris, num.trees = 5, case.weights = weights, write.forest = TRUE)
+  pred <- predict(rf, iris)$predictions
+  expect_that(all(pred == "setosa"), is_true())
+})
+
+
diff --git a/ranger-r-package/ranger/tests/testthat/test_ranger_prob.R b/ranger-r-package/ranger/tests/testthat/test_ranger_prob.R
@@ -0,0 +1,36 @@
+##This skript provides the tests for random forests for probability estimation
+
+library(ranger)
+library(survival)
+context("ranger")
+
+##Initialize random forest
+train.idx <- sample(nrow(iris), 2/3 * nrow(iris))
+iris.train <- iris[train.idx, ]
+iris.test <- iris[-train.idx, ]
+
+rg.prob <- ranger(Species ~ ., data = iris.train, write.forest = TRUE, probability = TRUE)
+prob <- predict(rg.prob, iris.test)
+
+##Tests
+test_that("probability estimations are a matrix with correct size", {
+  expect_that(prob$predictions, is_a("matrix"))
+  expect_that(nrow(prob$predictions), equals(nrow(iris.test)))
+  expect_that(ncol(prob$predictions), equals(length(rg.prob$forest$levels)))
+})
+
+test_that("probability estimations are between 0 and 1 and sum to 1", {
+  expect_that(all(prob$predictions > -1e-5 & prob$predictions <= 1 + 1e-5), is_true())
+  expect_that(rowSums(prob$predictions), equals(rep(1, nrow(prob$predictions))))
+})
+
+test_that("save.memory option works for probability", {
+  rf <- ranger(Species ~ ., data = iris, probability = TRUE, save.memory = TRUE)
+  expect_that(rf$treetype, equals("Probability estimation"))
+})
+
+test_that("predict works for single observations, probability prediction", {
+  rf <- ranger(Species ~ ., iris, write.forest = TRUE, probability = TRUE)
+  pred <- predict(rf, head(iris, 1))
+  expect_that(names(which.max(pred$predictions)), equals(as.character(iris[1,"Species"])))
+})