From 4d8d81803223dfc931dca8e37fa57ee467503497 Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 9 May 2018 09:37:28 +0200 Subject: [PATCH] avoid forest pointers --- cpp_version/src/main.cpp | 109 +++++++++++++++++++-------------------- src/Data.h | 2 +- src/Forest.cpp | 18 +++---- src/Forest.h | 12 ++--- 4 files changed, 69 insertions(+), 72 deletions(-) diff --git a/cpp_version/src/main.cpp b/cpp_version/src/main.cpp index ef0bbedae..acd0b6aba 100644 --- a/cpp_version/src/main.cpp +++ b/cpp_version/src/main.cpp @@ -1,18 +1,19 @@ /*------------------------------------------------------------------------------- -This file is part of ranger. + This file is part of ranger. -Copyright (c) [2014-2018] [Marvin N. Wright] + Copyright (c) [2014-2018] [Marvin N. Wright] -This software may be modified and distributed under the terms of the MIT license. + This software may be modified and distributed under the terms of the MIT license. -Please note that the C++ core of ranger is distributed under MIT license and the -R package "ranger" under GPL3 license. -#-------------------------------------------------------------------------------*/ + Please note that the C++ core of ranger is distributed under MIT license and the + R package "ranger" under GPL3 license. + #-------------------------------------------------------------------------------*/ #include #include #include #include +#include #include "globals.h" #include "ArgumentHandler.h" @@ -23,72 +24,68 @@ R package "ranger" under GPL3 license. using namespace ranger; +void run_ranger(const ArgumentHandler& arg_handler, std::ostream& verbose_out) { + verbose_out << "Starting Ranger." << std::endl; + + // Create forest object + std::unique_ptr forest {}; + switch (arg_handler.treetype) { + case TREE_CLASSIFICATION: + if (arg_handler.probability) { + forest = std::unique_ptr(new ForestProbability); + } else { + forest = std::unique_ptr(new ForestClassification); + } + break; + case TREE_REGRESSION: + forest = std::unique_ptr(new ForestRegression); + break; + case TREE_SURVIVAL: + forest = std::unique_ptr(new ForestSurvival); + break; + case TREE_PROBABILITY: + forest = std::unique_ptr(new ForestProbability); + break; + } + + // Call Ranger + forest->initCpp(arg_handler.depvarname, arg_handler.memmode, arg_handler.file, arg_handler.mtry, + arg_handler.outprefix, arg_handler.ntree, &verbose_out, arg_handler.seed, arg_handler.nthreads, + arg_handler.predict, arg_handler.impmeasure, arg_handler.targetpartitionsize, arg_handler.splitweights, + arg_handler.alwayssplitvars, arg_handler.statusvarname, arg_handler.replace, arg_handler.catvars, + arg_handler.savemem, arg_handler.splitrule, arg_handler.caseweights, arg_handler.predall, arg_handler.fraction, + arg_handler.alpha, arg_handler.minprop, arg_handler.holdout, arg_handler.predictiontype, + arg_handler.randomsplits); + + forest->run(true); + if (arg_handler.write) { + forest->saveToFile(); + } + forest->writeOutput(); + verbose_out << "Finished Ranger." << std::endl; +} + int main(int argc, char **argv) { - ArgumentHandler arg_handler(argc, argv); - Forest* forest = 0; try { - // Handle command line arguments + ArgumentHandler arg_handler(argc, argv); if (arg_handler.processArguments() != 0) { return 0; } arg_handler.checkArguments(); - // Create forest object - switch (arg_handler.treetype) { - case TREE_CLASSIFICATION: - if (arg_handler.probability) { - forest = new ForestProbability; - } else { - forest = new ForestClassification; - } - break; - case TREE_REGRESSION: - forest = new ForestRegression; - break; - case TREE_SURVIVAL: - forest = new ForestSurvival; - break; - case TREE_PROBABILITY: - forest = new ForestProbability; - break; - } - - // Verbose output to logfile if non-verbose mode - std::ostream* verbose_out; if (arg_handler.verbose) { - verbose_out = &std::cout; + run_ranger(arg_handler, std::cout); } else { - std::ofstream* logfile = new std::ofstream(); - logfile->open(arg_handler.outprefix + ".log"); - if (!logfile->good()) { + std::ofstream logfile { arg_handler.outprefix + ".log" }; + if (!logfile.good()) { throw std::runtime_error("Could not write to logfile."); } - verbose_out = logfile; - } - - // Call Ranger - *verbose_out << "Starting Ranger." << std::endl; - forest->initCpp(arg_handler.depvarname, arg_handler.memmode, arg_handler.file, arg_handler.mtry, - arg_handler.outprefix, arg_handler.ntree, verbose_out, arg_handler.seed, arg_handler.nthreads, - arg_handler.predict, arg_handler.impmeasure, arg_handler.targetpartitionsize, arg_handler.splitweights, - arg_handler.alwayssplitvars, arg_handler.statusvarname, arg_handler.replace, arg_handler.catvars, - arg_handler.savemem, arg_handler.splitrule, arg_handler.caseweights, arg_handler.predall, arg_handler.fraction, - arg_handler.alpha, arg_handler.minprop, arg_handler.holdout, arg_handler.predictiontype, - arg_handler.randomsplits); - - forest->run(true); - if (arg_handler.write) { - forest->saveToFile(); + run_ranger(arg_handler, logfile); } - forest->writeOutput(); - *verbose_out << "Finished Ranger." << std::endl; - - delete forest; } catch (std::exception& e) { std::cerr << "Error: " << e.what() << " Ranger will EXIT now." << std::endl; - delete forest; return -1; } diff --git a/src/Data.h b/src/Data.h index 9668e607b..9fcc22e14 100644 --- a/src/Data.h +++ b/src/Data.h @@ -148,7 +148,7 @@ class Data { return is_ordered_variable; } - void setIsOrderedVariable(std::vector& unordered_variable_names) { + void setIsOrderedVariable(const std::vector& unordered_variable_names) { is_ordered_variable.resize(num_cols, true); for (auto& variable_name : unordered_variable_names) { size_t varID = getVariableID(variable_name); diff --git a/src/Forest.cpp b/src/Forest.cpp index ec02ae3b7..a472cd6d1 100644 --- a/src/Forest.cpp +++ b/src/Forest.cpp @@ -47,11 +47,11 @@ Forest::~Forest() { void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode, std::string input_file, uint mtry, std::string output_prefix, uint num_trees, std::ostream* verbose_out, uint seed, uint num_threads, std::string load_forest_filename, ImportanceMode importance_mode, uint min_node_size, - std::string split_select_weights_file, std::vector& always_split_variable_names, - std::string status_variable_name, bool sample_with_replacement, std::vector& unordered_variable_names, - bool memory_saving_splitting, SplitRule splitrule, std::string case_weights_file, bool predict_all, - double sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, - uint num_random_splits) { + std::string split_select_weights_file, const std::vector& always_split_variable_names, + std::string status_variable_name, bool sample_with_replacement, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + std::string case_weights_file, bool predict_all, double sample_fraction, double alpha, double minprop, bool holdout, + PredictionType prediction_type, uint num_random_splits) { this->verbose_out = verbose_out; @@ -143,9 +143,9 @@ void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode void Forest::initR(std::string dependent_variable_name, Data* input_data, uint mtry, uint num_trees, std::ostream* verbose_out, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size, - std::vector>& split_select_weights, std::vector& always_split_variable_names, + std::vector>& split_select_weights, const std::vector& always_split_variable_names, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement, - std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, std::vector& case_weights, bool predict_all, bool keep_inbag, std::vector& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits, bool order_snps) { @@ -183,7 +183,7 @@ void Forest::initR(std::string dependent_variable_name, Data* input_data, uint m void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, Data* input_data, uint mtry, std::string output_prefix, uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement, - std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, bool predict_all, std::vector& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits, bool order_snps) { @@ -874,7 +874,7 @@ void Forest::setSplitWeightVector(std::vector>& split_select } } -void Forest::setAlwaysSplitVariables(std::vector& always_split_variable_names) { +void Forest::setAlwaysSplitVariables(const std::vector& always_split_variable_names) { deterministic_varIDs.reserve(num_independent_variables); diff --git a/src/Forest.h b/src/Forest.h index 5112bf27a..3abb72499 100644 --- a/src/Forest.h +++ b/src/Forest.h @@ -41,23 +41,23 @@ class Forest { void initCpp(std::string dependent_variable_name, MemoryMode memory_mode, std::string input_file, uint mtry, std::string output_prefix, uint num_trees, std::ostream* verbose_out, uint seed, uint num_threads, std::string load_forest_filename, ImportanceMode importance_mode, uint min_node_size, - std::string split_select_weights_file, std::vector& always_split_variable_names, + std::string split_select_weights_file, const std::vector& always_split_variable_names, std::string status_variable_name, bool sample_with_replacement, - std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, std::string case_weights_file, bool predict_all, double sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits); void initR(std::string dependent_variable_name, Data* input_data, uint mtry, uint num_trees, std::ostream* verbose_out, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size, - std::vector>& split_select_weights, std::vector& always_split_variable_names, + std::vector>& split_select_weights, const std::vector& always_split_variable_names, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement, - std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, std::vector& case_weights, bool predict_all, bool keep_inbag, std::vector& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits, bool order_snps); void init(std::string dependent_variable_name, MemoryMode memory_mode, Data* input_data, uint mtry, std::string output_prefix, uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement, - std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, + const std::vector& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule, bool predict_all, std::vector& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits, bool order_snps); virtual void initInternal(std::string status_variable_name) = 0; @@ -165,7 +165,7 @@ class Forest { // Set split select weights and variables to be always considered for splitting void setSplitWeightVector(std::vector>& split_select_weights); - void setAlwaysSplitVariables(std::vector& always_split_variable_names); + void setAlwaysSplitVariables(const std::vector& always_split_variable_names); // Show progress every few seconds #ifdef OLD_WIN_R_BUILD