forked from neurodata/SPORF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
RunPredictSim.R
111 lines (105 loc) · 4.11 KB
/
RunPredictSim.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#' Predict class labels on a test set using a single tree.
#'
#' This is the base function called by Predict.
#'
#' @param X an n sample by d feature matrix (preferable) or data frame which was used to train the provided forest.
#' @param Xtrain an n by d numeric matrix (preferable) or data frame. This should be the same data matrix/frame used to train the forest. If provided, then the output will be an ntest-by-ntrain matrix of similarities between training and test points (Xtrain=NULL)
#' @param tree a tree from a forest returned by RerF.
#' @param OOB if TRUE and Xtrain is provided, then the tree predicts only the similarities of the out-of-bag training points to the test points. (OOB=TRUE)
#'
#' @return predictions an n length vector of prediction based on the tree provided to this function
#'
RunPredictSim <-
function(X, Xtrain, tree, OOB) {
tm <- 0L
currentNode <- 0L
curr_ind <- 0L
if (!is.null(Xtrain)) {
ntrain <- nrow(Xtrain)
if (OOB) {
# predict.idx <- which(!(1L:ntrain %in% tree$ind))
# num.oob <-length(predict.idx)
# Xtrain <- Xtrain[predict.idx, , drop = F]
num.oob <- length(tree$ind)
Xtrain <- Xtrain[tree$ind, , drop = F]
}
ntest <- nrow(X)
X <- rbind(Xtrain, X)
}
n <- nrow(X)
# do we need to rotate the data?
if (!is.null(tree$rotmat)) {
if (is.null(tree$rotdims)) {
X[] <- X %*% tree$rotmat
} else {
X[, tree$rotdims] <- X[, tree$rotdims] %*% tree$rotmat
}
}
leafNodeIdx <- integer(n)
Xnode <- double(n)
numNodes <- length(tree$treeMap)
Assigned2Node <- vector("list", numNodes)
Assigned2Node[[1L]] <- 1L:n
for (m in 1:numNodes) {
nodeSize <- length(Assigned2Node[[m]])
if (nodeSize > 0L) {
if ((tm <- tree$treeMap[m]) > 0L) {
indexHigh <- tree$matAindex[tm + 1L]
indexLow <- tree$matAindex[tm] + 1L
s <- (indexHigh - indexLow + 1L) / 2L
Xnode[1:nodeSize] <- X[Assigned2Node[[m]], tree$matAstore[indexLow:indexHigh][(1:s) * 2L - 1L], drop = F] %*%
tree$matAstore[indexLow:indexHigh][(1:s) * 2L]
moveLeft <- Xnode[1L:nodeSize] <= tree$CutPoint[tm]
Assigned2Node[[tm * 2L]] <- Assigned2Node[[m]][moveLeft]
Assigned2Node[[tm * 2L + 1L]] <- Assigned2Node[[m]][!moveLeft]
} else {
leafNodeIdx[Assigned2Node[[m]]] <- tm * -1L
}
}
Assigned2Node[m] <- list(NULL)
}
if (is.null(Xtrain)) {
predictions <- matrix(as.double(NA), nrow = n, ncol = n)
diag(predictions) <- 1
for (j in 1L:(n - 1L)) {
for (i in (j + 1L):n) {
if (leafNodeIdx[i] >= leafNodeIdx[j]) {
predictions[i, j] <- tree$leafSimilarity[leafNodeIdx[i], leafNodeIdx[j]]
} else {
predictions[i, j] <- tree$leafSimilarity[leafNodeIdx[j], leafNodeIdx[i]]
}
}
}
} else {
predictions <- matrix(as.double(NA), nrow = ntest, ncol = ntrain)
if (OOB) {
for (j in 1L:num.oob) {
oob.idx <- tree$ind[j]
# oob.idx <- predict.idx[j]
for (i in 1L:ntest) {
if (leafNodeIdx[num.oob + i] >= leafNodeIdx[j]) {
predictions[i, oob.idx] <- tree$leafSimilarity[leafNodeIdx[num.oob + i], leafNodeIdx[j]]
} else {
predictions[i, oob.idx] <- tree$leafSimilarity[leafNodeIdx[j], leafNodeIdx[num.oob + i]]
}
}
}
} else {
for (j in 1L:ntrain) {
for (i in 1L:ntest) {
if (leafNodeIdx[ntrain + i] >= leafNodeIdx[j]) {
predictions[i, j] <- tree$leafSimilarity[leafNodeIdx[ntrain + i], leafNodeIdx[j]]
} else {
predictions[i, j] <- tree$leafSimilarity[leafNodeIdx[j], leafNodeIdx[ntrain + i]]
}
}
}
}
}
### If square, make output matrix symmetric
### TODO: this could be made into a class similar to base::dist
if (dim(predictions)[1] == dim(predictions)[2]) {
predictions[upper.tri(predictions)] <- t(predictions)[upper.tri(predictions)]
}
return(predictions)
}