Skip to content

Commit

Permalink
have the tuning strategies support logging
Browse files Browse the repository at this point in the history
  • Loading branch information
accosmin committed Jul 7, 2024
1 parent 5cbafd7 commit ccc0d25
Show file tree
Hide file tree
Showing 23 changed files with 115 additions and 131 deletions.
3 changes: 1 addition & 2 deletions app/bench_gboost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ int unsafe_main(int argc, const char* argv[])
auto model = gboost_model_t{};
rconfig.setup(model);

const auto fit_logger = ml::params_t::make_stdio_logger();
const auto fit_params = ml::params_t{}.solver(*rsolver).tuner(*rtuner).logger(fit_logger);
const auto fit_params = ml::params_t{}.solver(*rsolver).tuner(*rtuner).logger(make_stdout_logger());
const auto fit_result = model.fit(dataset, train_samples, *rloss, wlearners, fit_params);

const auto test_errors_values = model.evaluate(dataset, valid_samples, *rloss);
Expand Down
3 changes: 1 addition & 2 deletions app/bench_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,7 @@ int unsafe_main(int argc, const char* argv[])
auto model = linear_model_t{};
rconfig.setup(model);

const auto fit_logger = ml::params_t::make_stdio_logger();
const auto fit_params = ml::params_t{}.solver(*rsolver).tuner(*rtuner).logger(fit_logger);
const auto fit_params = ml::params_t{}.solver(*rsolver).tuner(*rtuner).logger(make_stdout_logger());
const auto fit_result = model.fit(dataset, train_samples, *rloss, fit_params);

const auto test_errors_values = model.evaluate(dataset, valid_samples, *rloss);
Expand Down
12 changes: 1 addition & 11 deletions include/nano/mlearn/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@ class result_t;
class NANO_PUBLIC params_t
{
public:
///
/// \brief logging operator: op(result - up to the current step, prefix)
///
using logger_t = std::function<void(const result_t&, const string_t&)>;

///
/// \brief returns a default logging implementation that prints the current status to standard I/O.
///
static logger_t make_stdio_logger(int precision = 8);

///
/// \brief default constructor
///
Expand Down Expand Up @@ -100,7 +90,7 @@ class NANO_PUBLIC params_t
///
/// \brief log the current fitting result.
///
void log(const result_t&, const string_t& prefix) const;
void log(const result_t&, tensor_size_t last_trial, const string_t& prefix, int precision = 8) const;

private:
// attributes
Expand Down
6 changes: 6 additions & 0 deletions include/nano/mlearn/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class NANO_PUBLIC result_t
///
scalar_t value(tensor_size_t trial, split_type = split_type::valid, value_type = value_type::errors) const;

///
/// \brief returns the average value of the given trial range across folds.
///
tensor1d_t values(tensor_range_t trial_range, split_type = split_type::valid,
value_type = value_type::errors) const;

///
/// \brief returns the statistics for the optimum hyper-parameters.
///
Expand Down
14 changes: 6 additions & 8 deletions include/nano/mlearn/tune.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ result_t tune(const string_t& prefix, const indices_t& samples, const params_t&
auto tpool = parallel::pool_t{};
auto result = result_t{std::move(param_spaces), folds};

// TODO: detailed logs in the splitter and tuner
// TODO: detailed logs for each trial+fold for ML models and solvers

// tune hyper-parameters (if any) in parallel by hyper-parameter trials and folds
const auto callback = [&](const tensor2d_t& new_params)
{
Expand All @@ -48,19 +51,14 @@ result_t tune(const string_t& prefix, const indices_t& samples, const params_t&

tpool.map(folds * new_trials, thread_callback);

fit_params.log(result, prefix);
fit_params.log(result, old_trials, prefix);

tensor1d_t values{new_trials};
for (tensor_size_t trial = 0; trial < new_trials; ++trial)
{
values(trial) = result.value(old_trials + trial);
}
return values;
return result.values(make_range(old_trials, old_trials + new_trials));
};

if (!result.param_spaces().empty())
{
fit_params.tuner().optimize(result.param_spaces(), callback);
fit_params.tuner().optimize(result.param_spaces(), callback, fit_params.logger());
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion include/nano/tensor/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class tensor_range_t
///
/// \brief creates a range of dimensions.
///
inline auto make_range(tensor_size_t begin, tensor_size_t end)
inline auto make_range(const tensor_size_t begin, const tensor_size_t end)
{
return tensor_range_t{begin, end};
}
Expand Down
5 changes: 3 additions & 2 deletions include/nano/tuner.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <nano/configurable.h>
#include <nano/factory.h>
#include <nano/logger.h>
#include <nano/tuner/callback.h>
#include <nano/tuner/space.h>
#include <nano/tuner/step.h>
Expand Down Expand Up @@ -33,9 +34,9 @@ class NANO_PUBLIC tuner_t : public typed_t, public configurable_t, public clonab
///
/// \brief optimize the given hyper-parameters and returns all the evaluated steps.
///
tuner_steps_t optimize(const param_spaces_t&, const tuner_callback_t&) const;
tuner_steps_t optimize(const param_spaces_t&, const tuner_callback_t&, const logger_t&) const;

private:
virtual void do_optimize(const param_spaces_t&, const tuner_callback_t&, tuner_steps_t& steps) const = 0;
virtual void do_optimize(const param_spaces_t&, const tuner_callback_t&, const logger_t&, tuner_steps_t&) const = 0;
};
} // namespace nano
2 changes: 1 addition & 1 deletion include/nano/tuner/callback.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@

namespace nano
{
///< evaluates the candidate hyper-parameter values
///< evaluates the given set of hyper-parameter values: (trials, hyper-parameter values) => (trials,)
using tuner_callback_t = std::function<tensor1d_t(const tensor2d_t&)>;
} // namespace nano
2 changes: 1 addition & 1 deletion include/nano/tuner/local.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ class NANO_PUBLIC local_search_tuner_t final : public tuner_t
///
/// \brief @see tuner_t
///
void do_optimize(const param_spaces_t&, const tuner_callback_t&, tuner_steps_t&) const override;
void do_optimize(const param_spaces_t&, const tuner_callback_t&, const logger_t&, tuner_steps_t&) const override;
};
} // namespace nano
2 changes: 1 addition & 1 deletion include/nano/tuner/surrogate.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,6 @@ class NANO_PUBLIC surrogate_tuner_t final : public tuner_t
///
/// \brief @see tuner_t
///
void do_optimize(const param_spaces_t&, const tuner_callback_t&, tuner_steps_t&) const override;
void do_optimize(const param_spaces_t&, const tuner_callback_t&, const logger_t&, tuner_steps_t&) const override;
};
} // namespace nano
4 changes: 3 additions & 1 deletion include/nano/tuner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

namespace nano
{
class logger_t;
using igrid_t = indices_t;
using igrids_t = std::vector<igrid_t>;

Expand Down Expand Up @@ -39,5 +40,6 @@ NANO_PUBLIC igrids_t local_search(const igrid_t& min_igrid, const igrid_t& max_i
/// \brief evaluate the given grid points (if not already) and update the given tuner steps.
/// returns true if at least one new grid point needs to be evaluated.
///
NANO_PUBLIC bool evaluate(const param_spaces_t&, const tuner_callback_t&, igrids_t igrids, tuner_steps_t&);
NANO_PUBLIC bool evaluate(const param_spaces_t&, const tuner_callback_t&, igrids_t igrids, const logger_t&,
tuner_steps_t&);
} // namespace nano
2 changes: 1 addition & 1 deletion src/gboost/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ ml::result_t gboost_model_t::fit(const dataset_t& dataset, const indices_t& samp

fit_result.store(::selected(values, samples));
}
fit_params.log(fit_result, "gboost");
fit_params.log(fit_result, fit_result.trials(), "gboost");

return fit_result;
}
Expand Down
2 changes: 1 addition & 1 deletion src/linear/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ ml::result_t linear_model_t::fit(const dataset_t& dataset, const indices_t& samp
m_bias = std::move(result.m_bias);
m_weights = std::move(result.m_weights);
}
fit_params.log(fit_result, "linear");
fit_params.log(fit_result, fit_result.trials(), "linear");

return fit_result;
}
Expand Down
116 changes: 52 additions & 64 deletions src/mlearn/params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,66 +7,6 @@
using namespace nano;
using namespace nano::ml;

params_t::logger_t params_t::make_stdio_logger(const int precision)
{
return [=, last_trial = tensor_size_t{0U}](const result_t& result, const string_t& prefix) mutable
{
const auto& spaces = result.param_spaces();
const auto optim_errors_stats = result.stats(value_type::errors);
const auto optim_losses_stats = result.stats(value_type::losses);

const auto print_params = [&](const tensor1d_cmap_t params, const auto... tokens)
{
assert(spaces.size() == static_cast<size_t>(params.size()));

// FIXME: should use the loggable_t interface for this!
auto logger = make_stdout_logger();
logger.log(log_type::info, std::fixed, std::setprecision(precision), std::fixed, prefix, ": ");
for (size_t i = 0U, size = spaces.size(); i < size; ++i)
{
logger.log(spaces[i].name(), "=", params(static_cast<tensor_size_t>(i)), ",");
}
logger.log(tokens..., ".\n");
};

for (tensor_size_t trial = last_trial; trial < result.trials(); ++trial)
{
const auto folds = result.folds();
const auto norm = static_cast<scalar_t>(folds);

auto sum_train_losses = 0.0;
auto sum_train_errors = 0.0;
auto sum_valid_losses = 0.0;
auto sum_valid_errors = 0.0;
for (tensor_size_t fold = 0; fold < folds; ++fold)
{
const auto fold_train_value = result.stats(trial, fold, split_type::train, value_type::losses).m_mean;
const auto fold_train_error = result.stats(trial, fold, split_type::train, value_type::errors).m_mean;
const auto fold_valid_value = result.stats(trial, fold, split_type::valid, value_type::losses).m_mean;
const auto fold_valid_error = result.stats(trial, fold, split_type::valid, value_type::errors).m_mean;

print_params(result.params(trial), "train=", fold_train_value, "/", fold_train_error, ",",
"valid=", fold_valid_value, "/", fold_valid_error, ",fold=", (fold + 1), "/", folds);

sum_train_losses += fold_train_value;
sum_train_errors += fold_train_error;
sum_valid_losses += fold_valid_value;
sum_valid_errors += fold_valid_error;
}

print_params(result.params(trial), "train=", sum_train_losses / norm, "/", sum_train_errors / norm, ",",
"valid=", sum_valid_losses / norm, "/", sum_valid_errors / norm, "(average)");
}
last_trial = result.trials();

if (std::isfinite(optim_errors_stats.m_mean))
{
const auto trial = result.optimum_trial();
print_params(result.params(trial), "refit=", optim_losses_stats.m_mean, "/", optim_errors_stats.m_mean);
}
};
}

params_t::params_t()
{
tuner("surrogate");
Expand Down Expand Up @@ -203,15 +143,63 @@ const splitter_t& params_t::splitter() const
return *m_splitter;
}

const params_t::logger_t& params_t::logger() const
const logger_t& params_t::logger() const
{
return m_logger;
}

void params_t::log(const result_t& result, const string_t& prefix) const
void params_t::log(const result_t& result, const tensor_size_t last_trial, const string_t& prefix,
const int precision) const
{
if (m_logger)
const auto& spaces = result.param_spaces();
const auto optim_errors_stats = result.stats(value_type::errors);
const auto optim_losses_stats = result.stats(value_type::losses);

const auto print_params = [&](const tensor1d_cmap_t params, const auto... tokens)
{
assert(spaces.size() == static_cast<size_t>(params.size()));

// FIXME: should use the loggable_t interface for this!
m_logger.log(log_type::info, std::fixed, std::setprecision(precision), std::fixed, prefix, ": ");
for (size_t i = 0U, size = spaces.size(); i < size; ++i)
{
m_logger.log(spaces[i].name(), "=", params(static_cast<tensor_size_t>(i)), ",");
}
m_logger.log(tokens..., ".\n");
};

for (tensor_size_t trial = last_trial; trial < result.trials(); ++trial)
{
const auto folds = result.folds();
const auto norm = static_cast<scalar_t>(folds);

auto sum_train_losses = 0.0;
auto sum_train_errors = 0.0;
auto sum_valid_losses = 0.0;
auto sum_valid_errors = 0.0;
for (tensor_size_t fold = 0; fold < folds; ++fold)
{
const auto fold_train_value = result.stats(trial, fold, split_type::train, value_type::losses).m_mean;
const auto fold_train_error = result.stats(trial, fold, split_type::train, value_type::errors).m_mean;
const auto fold_valid_value = result.stats(trial, fold, split_type::valid, value_type::losses).m_mean;
const auto fold_valid_error = result.stats(trial, fold, split_type::valid, value_type::errors).m_mean;

print_params(result.params(trial), "train=", fold_train_value, "/", fold_train_error, ",",
"valid=", fold_valid_value, "/", fold_valid_error, ",fold=", (fold + 1), "/", folds);

sum_train_losses += fold_train_value;
sum_train_errors += fold_train_error;
sum_valid_losses += fold_valid_value;
sum_valid_errors += fold_valid_error;
}

print_params(result.params(trial), "train=", sum_train_losses / norm, "/", sum_train_errors / norm, ",",
"valid=", sum_valid_losses / norm, "/", sum_valid_errors / norm, "(average)");
}

if (std::isfinite(optim_errors_stats.m_mean))
{
m_logger(result, prefix);
const auto trial = result.optimum_trial();
print_params(result.params(trial), "refit=", optim_losses_stats.m_mean, "/", optim_errors_stats.m_mean);
}
}
12 changes: 12 additions & 0 deletions src/mlearn/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ tensor1d_cmap_t result_t::params(const tensor_size_t trial) const

scalar_t result_t::value(const tensor_size_t trial, const split_type split, const value_type value) const
{
assert(trial >= 0 && trial < trials());

auto sum_mean = 0.0;
for (tensor_size_t fold = 0, folds = this->folds(); fold < folds; ++fold)
{
Expand All @@ -118,6 +120,16 @@ scalar_t result_t::value(const tensor_size_t trial, const split_type split, cons
return sum_mean / static_cast<scalar_t>(folds());
}

tensor1d_t result_t::values(const tensor_range_t trial_range, const split_type split, const value_type value) const
{
tensor1d_t values(trial_range.size());
for (tensor_size_t trial = trial_range.begin(); trial < trial_range.end(); ++trial)
{
values(trial - trial_range.begin()) = this->value(trial, split, value);
}
return values;
}

stats_t result_t::stats(const value_type value) const
{
const auto ivalue = value == value_type::errors ? 0 : 1;
Expand Down
9 changes: 5 additions & 4 deletions src/tuner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ tuner_t::tuner_t(string_t id)
register_parameter(parameter_t::make_integer("tuner::max_evals", 10, LE, 100, LE, 1000));
}

tuner_steps_t tuner_t::optimize(const param_spaces_t& spaces, const tuner_callback_t& callback) const
tuner_steps_t tuner_t::optimize(const param_spaces_t& spaces, const tuner_callback_t& callback,
const logger_t& logger) const
{
critical(spaces.empty(), "tuner: at least one parameter space is needed!");

Expand All @@ -24,17 +25,17 @@ tuner_steps_t tuner_t::optimize(const param_spaces_t& spaces, const tuner_callba
tuner_steps_t steps;

// initialize using a coarse grid
evaluate(spaces, callback, igrids_t{avg_igrid}, steps);
evaluate(spaces, callback, igrids_t{avg_igrid}, logger, steps);
for (tensor_size_t radius = 2; !steps.empty() && steps.size() < max_evals / 2; radius *= 2)
{
const auto igrids = local_search(min_igrid, max_igrid, steps.begin()->m_igrid, radius);
if (!evaluate(spaces, callback, igrids, steps))
if (!evaluate(spaces, callback, igrids, logger, steps))
{
break;
}
}

do_optimize(spaces, callback, steps);
do_optimize(spaces, callback, logger, steps);

return steps;
}
Expand Down
4 changes: 2 additions & 2 deletions src/tuner/local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ rtuner_t local_search_tuner_t::clone() const
}

void local_search_tuner_t::do_optimize(const param_spaces_t& spaces, const tuner_callback_t& callback,
tuner_steps_t& steps) const
const logger_t& logger, tuner_steps_t& steps) const
{
const auto max_evals = parameter("tuner::max_evals").value<size_t>();
const auto min_igrid = make_min_igrid(spaces);
Expand All @@ -24,7 +24,7 @@ void local_search_tuner_t::do_optimize(const param_spaces_t& spaces, const tuner
for (; !steps.empty() && steps.size() < max_evals;)
{
const auto igrids = local_search(min_igrid, max_igrid, steps.begin()->m_igrid, 1);
if (!evaluate(spaces, callback, igrids, steps))
if (!evaluate(spaces, callback, igrids, logger, steps))
{
break;
}
Expand Down
Loading

0 comments on commit ccc0d25

Please sign in to comment.