/
test_sparse.R
124 lines (93 loc) · 4.29 KB
/
test_sparse.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
library(ranger)
library(survival)
library(Matrix)
library(methods)
context("ranger_sparse")
## Iris sparse data
iris_sparse <- Matrix(data.matrix(iris), sparse = TRUE)
## 0/1 sparse data
n <- 100
p <- 5
x <- replicate(p, rbinom(n, 1, .1))
y <- rbinom(n, 1, .5)
dat <- data.frame(y = y, x)
dat_matrix <- data.matrix(dat)
dat_sparse <- Matrix(dat_matrix, sparse = TRUE)
test_that("Same result with sparse data for iris classification", {
set.seed(56)
rf1 <- ranger(data = iris_sparse, dependent.variable.name = "Species", classification = TRUE, num.trees = 5)
set.seed(56)
rf2 <- ranger(data = iris, dependent.variable.name = "Species", num.trees = 5)
expect_equal(rf1$prediction.error, rf2$prediction.error)
pred1 <- levels(iris$Species)[rf1$predictions[!is.na(rf1$predictions)]]
pred2 <- as.character(rf2$predictions[!is.na(rf2$predictions)])
expect_equal(pred1, pred2)
})
test_that("Same result with sparse data for iris regression", {
set.seed(56)
rf1 <- ranger(data = iris_sparse, dependent.variable.name = "Sepal.Length", classification = FALSE, num.trees = 5)
set.seed(56)
rf2 <- ranger(data = iris, dependent.variable.name = "Sepal.Length", num.trees = 5)
expect_equal(rf1$prediction.error, rf2$prediction.error)
pred1 <- rf1$predictions[!is.na(rf1$predictions)]
pred2 <- rf2$predictions[!is.na(rf2$predictions)]
expect_equal(pred1, pred2)
})
test_that("Same result with sparse data for 0/1 classification", {
set.seed(56)
rf1 <- ranger(data = dat_sparse, dependent.variable.name = "y", classification = TRUE, num.trees = 5)
set.seed(56)
rf2 <- ranger(data = dat, dependent.variable.name = "y", classification = TRUE, num.trees = 5)
expect_equal(rf1$prediction.error, rf2$prediction.error)
pred1 <- as.character(rf1$predictions[!is.na(rf1$predictions)])
pred2 <- as.character(rf2$predictions[!is.na(rf2$predictions)])
expect_equal(pred1, pred2)
})
test_that("Same result with sparse data for 0/1 regression", {
set.seed(56)
rf1 <- ranger(data = dat_sparse, dependent.variable.name = "y", classification = FALSE, num.trees = 5)
set.seed(56)
rf2 <- ranger(data = dat, dependent.variable.name = "y", num.trees = 5)
expect_equal(rf1$prediction.error, rf2$prediction.error)
pred1 <- rf1$predictions[!is.na(rf1$predictions)]
pred2 <- rf2$predictions[!is.na(rf2$predictions)]
expect_equal(pred1, pred2)
})
test_that("Same result with sparse data for 0/1 probability prediction", {
set.seed(56)
rf1 <- ranger(data = dat_sparse, dependent.variable.name = "y", probability = TRUE, num.trees = 5)
set.seed(56)
rf2 <- ranger(data = dat, dependent.variable.name = "y", probability = TRUE, num.trees = 5)
expect_equal(rf1$prediction.error, rf2$prediction.error)
pred1 <- rf1$predictions[!is.na(rf1$predictions)]
pred2 <- rf2$predictions[!is.na(rf2$predictions)]
expect_equal(pred1, pred2)
})
test_that("Prediction is the same if training or testing data is sparse", {
idx <- sample(nrow(iris), 2/3*nrow(iris))
train <- iris[idx, ]
test <- iris[-idx, ]
train_sparse <- Matrix(data.matrix(train), sparse = TRUE)
test_sparse <- Matrix(data.matrix(test), sparse = TRUE)
set.seed(42)
rf1 <- ranger(data = train, dependent.variable.name = "Species", classification = TRUE, num.trees = 5)
pred1 <- predict(rf1, test)
pred1_sparse <- predict(rf1, test_sparse)
set.seed(42)
rf2 <- ranger(data = train_sparse, dependent.variable.name = "Species", classification = TRUE, num.trees = 5)
pred2 <- predict(rf2, test)
pred2_sparse <- predict(rf2, test_sparse)
expect_equal(pred1$predictions, pred1_sparse$predictions)
expect_equal(as.character(pred1$predictions), levels(iris$Species)[pred2$predictions])
expect_equal(pred2$predictions, pred2_sparse$predictions)
})
test_that("Sparse probability prediction works correctly", {
rf <- ranger(data = dat_sparse, dependent.variable.name = "y", classification = TRUE, probability = TRUE, num.trees = 5)
pred <- predict(rf, dat_sparse)
expect_equal(dim(pred$predictions), c(nrow(dat_sparse), 2))
})
test_that("Corrected importance working for sparse data", {
rf <- ranger(data = dat_sparse, dependent.variable.name = "y", classification = TRUE,
num.trees = 5, importance = "impurity_corrected")
expect_equal(names(rf$variable.importance), colnames(dat_sparse)[-1])
})