Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from animusnaturae/merge_imbs
Merge new ranger version
- Loading branch information
Showing
44 changed files
with
2,070 additions
and
232 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,19 @@ | ||
Package: ranger | ||
Type: Package | ||
Title: A Fast Implementation of Random Forests | ||
Version: 0.3.9 | ||
Date: 2016-03-22 | ||
Version: 0.4.4 | ||
Date: 2016-05-24 | ||
Author: Marvin N. Wright | ||
Maintainer: Marvin N. Wright <wright@imbs.uni-luebeck.de> | ||
Description: A fast implementation of Random Forests, particularly suited for high dimensional data. Ensembles of | ||
classification, regression, survival and probability prediction trees are supported. Data from | ||
genome-wide association studies can be analyzed efficiently. In addition to data frames, datasets of | ||
class 'gwaa.data' (R package GenABEL) can be directly analyzed. | ||
Description: A fast implementation of Random Forests, particularly suited for high dimensional data. Ensembles | ||
of classification, regression, survival and probability prediction trees are supported. Data from | ||
genome-wide association studies can be analyzed efficiently. In addition to data frames, datasets | ||
of class 'gwaa.data' (R package 'GenABEL') can be directly analyzed. | ||
License: GPL-3 | ||
Imports: Rcpp (>= 0.11.2) | ||
LinkingTo: Rcpp | ||
Depends: R (>= 3.1) | ||
Suggests: survival, testthat | ||
Suggests: survival, testthat, GenABEL | ||
RoxygenNote: 5.0.1 | ||
URL: https://github.com/imbs-hl/ranger | ||
BugReports: https://github.com/imbs-hl/ranger/issues |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# This file was generated by Rcpp::compileAttributes | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
rangerCpp <- function(treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, sparse_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, predict_all, keep_inbag, sample_fraction) { | ||
.Call('ranger_rangerCpp', PACKAGE = 'ranger', treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, sparse_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, predict_all, keep_inbag, sample_fraction) | ||
rangerCpp <- function(treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, sparse_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout) { | ||
.Call('ranger_rangerCpp', PACKAGE = 'ranger', treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, sparse_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# ------------------------------------------------------------------------------- | ||
# This file is part of Ranger. | ||
# | ||
# Ranger is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# Ranger is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with Ranger. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
# Written by: | ||
# | ||
# Marvin N. Wright | ||
# Institut fuer Medizinische Biometrie und Statistik | ||
# Universitaet zu Luebeck | ||
# Ratzeburger Allee 160 | ||
# 23562 Luebeck | ||
# Germany | ||
# | ||
# http://www.imbs-luebeck.de | ||
# wright@imbs.uni-luebeck.de | ||
# ------------------------------------------------------------------------------- | ||
|
||
##' Grow two random forests on two cross-validation folds. | ||
##' Instead of out-of-bag data, the other fold is used to compute permutation importance. | ||
##' Related to the novel permutation variable importance by Janitza et al. (2015). | ||
##' | ||
##' @title Hold-out random forests | ||
##' @param formula Object of class \code{formula} or \code{character} describing the model to fit. | ||
##' @param data Training data of class \code{data.frame}, \code{matrix} or \code{gwaa.data} (GenABEL). | ||
##' @param ... Further arguments passed to ranger(). | ||
##' @return Hold-out random forests with variable importance. | ||
##' @seealso \code{\link{ranger}} | ||
##' @author Marvin N. Wright | ||
##' @references | ||
##' Janitza, S., Celik, E. & Boulesteix, A.-L., (2015). A computationally fast variable importance test for random forest for high dimensional data, Technical Report 185, University of Munich, \url{https://epub.ub.uni-muenchen.de/25587}. \cr | ||
##' @export | ||
holdoutRF <- function(formula, data, ...) { | ||
## Split data | ||
if ("gwaa.data" %in% class(data)) { | ||
n <- nrow(data@phdata) | ||
} else { | ||
n <- nrow(data) | ||
} | ||
weights <- rbinom(n, 1, 0.5) | ||
|
||
## Grow RFs | ||
res <- list( | ||
rf1 = ranger(formula = formula, data = data, importance = "permutation", | ||
case.weights = weights, replace = FALSE, holdout = TRUE, ...), | ||
rf2 = ranger(formula = formula, data = data, importance = "permutation", | ||
case.weights = 1-weights, replace = FALSE, holdout = TRUE, ...) | ||
) | ||
|
||
## Compute importance | ||
res$variable.importance <- (res$rf1$variable.importance + res$rf2$variable.importance)/2 | ||
res$treetype <- res$rf1$treetype | ||
res$importance.mode <- res$rf1$importance.mode | ||
class(res) <- "holdoutRF" | ||
|
||
res | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.