Skip to content

Commit

Permalink
Do not block stderr on Nan prediction + Non-logging logger (#2288)
Browse files Browse the repository at this point in the history
* Do not block stderr on Nan prediction

* cleanup

* forgotten linux quiet changes

* logger initialization

Co-authored-by: Jacob Alber <jalber@fernir.com>
  • Loading branch information
ataymano and lokitoth committed Feb 27, 2020
1 parent 45311c1 commit 3c0f0ff
Show file tree
Hide file tree
Showing 36 changed files with 123 additions and 110 deletions.
2 changes: 1 addition & 1 deletion vowpalwabbit/OjaNewton.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ void predict(OjaNewton& ON, base_learner&, example& ec)
ON.data.prediction = 0;
GD::foreach_feature<update_data, make_pred>(*ON.all, ec, ON.data);
ec.partial_prediction = (float)ON.data.prediction;
ec.pred.scalar = GD::finalize_prediction(ON.all->sd, ec.partial_prediction);
ec.pred.scalar = GD::finalize_prediction(ON.all->sd, ON.all->logger, ec.partial_prediction);
}

void update_Z_and_wbar(update_data& data, float x, float& wref)
Expand Down
4 changes: 2 additions & 2 deletions vowpalwabbit/audit_regressor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ inline void print_ex(vw& all, size_t ex_processed, size_t vals_found, size_t pro
void finish_example(vw& all, audit_regressor_data& dd, example& ec)
{
bool printed = false;
if (ec.example_counter + 1 >= all.sd->dump_interval && !all.quiet)
if (ec.example_counter + 1 >= all.sd->dump_interval && !all.logger.quiet)
{
print_ex(all, ec.example_counter + 1, dd.values_audited, dd.values_audited * 100 / dd.loaded_regressor_values);
all.sd->weighted_unlabeled_examples = (double)(ec.example_counter + 1); // used in update_dump_interval
Expand Down Expand Up @@ -229,7 +229,7 @@ void init_driver(audit_regressor_data& dat)
if (dat.loaded_regressor_values == 0)
THROW("regressor has no non-zero weights. Nothing to audit.");

if (!dat.all->quiet)
if (!dat.all->logger.quiet)
{
dat.all->trace_message << "Regressor contains " << dat.loaded_regressor_values << " values\n";
dat.all->trace_message << std::left << std::setw(shared_data::col_example_counter) << "example"
Expand Down
28 changes: 14 additions & 14 deletions vowpalwabbit/bfgs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ constexpr bool test_example(example& ec) noexcept { return ec.l.simple.label ==
float bfgs_predict(vw& all, example& ec)
{
ec.partial_prediction = GD::inline_predict(all, ec);
return GD::finalize_prediction(all.sd, ec.partial_prediction);
return GD::finalize_prediction(all.sd, all.logger, ec.partial_prediction);
}

inline void add_grad(float& d, float f, float& fw) { (&fw)[W_GT] += d * f; }
Expand Down Expand Up @@ -252,7 +252,7 @@ void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance
((&(*w))[W_GT]) = 0;
}
lastj = 0;
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10.5f\t%-10.5f\t%-10s\t%-10s\t%-10s\t", g1_g1 / (importance_weight_sum * importance_weight_sum),
g1_Hg1 / importance_weight_sum, "", "", "");
}
Expand Down Expand Up @@ -300,15 +300,15 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
(&(*w))[W_DIR] -= ((&(*w))[W_COND]) * ((&(*w))[W_GT]);
(&(*w))[W_GT] = 0;
}
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%f\t", beta);
return;

mem = mem0 + (length - 1) * b.mem_stride;
}
else
{
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10s\t", "");
}

Expand Down Expand Up @@ -426,7 +426,7 @@ double wolfe_eval(vw& all, bfgs& b, float* mem, double loss_sum, double previous
double wolfe2 = g1_d / g0_d;
// double new_step_cross = (loss_sum-previous_loss_sum-g1_d*step)/(g0_d-g1_d);

if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10.5f\t%-10.5f\t%s%-10f\t%-10f\t", g1_g1 / (importance_weight_sum * importance_weight_sum),
g1_Hg1 / importance_weight_sum, " ", wolfe1, wolfe2);
return 0.5 * step_size;
Expand Down Expand Up @@ -660,7 +660,7 @@ int process_pass(vw& all, bfgs& b)
}
if (all.l2_lambda > 0.)
b.loss_sum += add_regularization(all, b, all.l2_lambda);
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%2lu %-10.5f\t", (long unsigned int)b.current_pass + 1, b.loss_sum / b.importance_weight_sum);

b.previous_loss_sum = b.loss_sum;
Expand All @@ -679,7 +679,7 @@ int process_pass(vw& all, bfgs& b)
ftime(&b.t_end_global);
b.net_time = (int)(1000.0 * (b.t_end_global.time - b.t_start_global.time) +
(b.t_end_global.millitm - b.t_start_global.millitm));
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
b.predictions.clear();
update_weight(all, b.step_size);
Expand All @@ -699,7 +699,7 @@ int process_pass(vw& all, bfgs& b)
}
if (all.l2_lambda > 0.)
b.loss_sum += add_regularization(all, b, all.l2_lambda);
if (!all.quiet)
if (!all.logger.quiet)
{
if (!all.holdout_set_off && b.current_pass >= 1)
{
Expand Down Expand Up @@ -739,7 +739,7 @@ int process_pass(vw& all, bfgs& b)
b.net_time = (int)(1000.0 * (b.t_end_global.time - b.t_start_global.time) +
(b.t_end_global.millitm - b.t_start_global.millitm));
float ratio = (b.step_size == 0.f) ? 0.f : (float)new_step / (float)b.step_size;
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10s\t%-10s\t(revise x %.1f)\t%-.5f\n", "", "", ratio, new_step);
b.predictions.clear();
update_weight(all, (float)(-b.step_size + new_step));
Expand Down Expand Up @@ -790,7 +790,7 @@ int process_pass(vw& all, bfgs& b)
ftime(&b.t_end_global);
b.net_time = (int)(1000.0 * (b.t_end_global.time - b.t_start_global.time) +
(b.t_end_global.millitm - b.t_start_global.millitm));
if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
b.predictions.clear();
update_weight(all, b.step_size);
Expand Down Expand Up @@ -834,7 +834,7 @@ int process_pass(vw& all, bfgs& b)
b.net_time = (int)(1000.0 * (b.t_end_global.time - b.t_start_global.time) +
(b.t_end_global.millitm - b.t_start_global.millitm));

if (!all.quiet)
if (!all.logger.quiet)
fprintf(stderr, "%-10.5f\t%-10.5f\t%-.5f\n", b.curvature / b.importance_weight_sum, d_mag, b.step_size);
b.gradient_pass = true;
} // now start computing derivatives.
Expand Down Expand Up @@ -1045,7 +1045,7 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)

uint32_t stride_shift = all->weights.stride_shift();

if (!all->quiet)
if (!all->logger.quiet)
std::cerr << "m = " << m << std::endl
<< "Allocated "
<< ((long unsigned int)all->length() *
Expand All @@ -1056,7 +1056,7 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)
b.net_time = 0.0;
ftime(&b.t_start_global);

if (!all->quiet)
if (!all->logger.quiet)
{
const char* header_fmt = "%2s %-10s\t%-10s\t%-10s\t %-10s\t%-10s\t%-10s\t%-10s\t%-10s\t%-s\n";
fprintf(stderr, header_fmt, "##", "avg. loss", "der. mag.", "d. m. cond.", "wolfe1", "wolfe2", "mix fraction",
Expand Down Expand Up @@ -1133,7 +1133,7 @@ base_learner* bfgs_setup(options_i& options, vw& all)
if (b->m == 0)
all.hessian_on = true;

if (!all.quiet)
if (!all.logger.quiet)
{
if (b->m > 0)
b->all->trace_message << "enabling BFGS based optimization ";
Expand Down
6 changes: 3 additions & 3 deletions vowpalwabbit/boosting.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ void save_load(boosting& o, io_buf& model_file, bool read, bool text)
bin_text_write_fixed(model_file, (char*)&(o.alpha[i]), sizeof(o.alpha[i]), os2, text);
}

if (!o.all->quiet)
if (!o.all->logger.quiet)
{
if (read)
cerr << "Loading alpha: " << endl;
Expand Down Expand Up @@ -416,9 +416,9 @@ LEARNER::base_learner* boosting_setup(options_i& options, vw& all)
// "adaptive" implements AdaBoost.OL (Algorithm 2 in BLK'15,
// using sampling rather than importance weighting)

if (!all.quiet)
if (!all.logger.quiet)
cerr << "Number of weak learners = " << data->N << endl;
if (!all.quiet)
if (!all.logger.quiet)
cerr << "Gamma = " << data->gamma << endl;

data->C = std::vector<std::vector<int64_t> >(data->N, std::vector<int64_t>(data->N, -1));
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/cb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ bool ec_is_example_header(example const& ec) // example headers just have "shar

void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores)
{
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.logger.quiet && !all.bfgs)
{
size_t num_features = ec.num_features;

Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/cb_dro.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ base_learner *cb_dro_setup(options_i &options, vw &all)
THROW("cb_dro_wmax must exceed 1");
}

if (!all.quiet)
if (!all.logger.quiet)
{
std::cerr << "Using DRO for CB learning" << std::endl;
std::cerr << "cb_dro_alpha = " << alpha << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/cb_explore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec)

void print_update_cb_explore(vw& all, bool is_test, example& ec, std::stringstream& pred_string)
{
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.logger.quiet && !all.bfgs)
{
std::stringstream label_string;
if (is_test)
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/classweight.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ LEARNER::base_learner* classweight_setup(options_i& options, vw& all)

for (auto& s : classweight_array) cweights->load_string(s);

if (!all.quiet)
if (!all.logger.quiet)
all.trace_message << "parsed " << cweights->weights.size() << " class weights" << std::endl;

LEARNER::single_learner* base = as_singleline(setup_base(options, all));
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/conditional_contextual_bandit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ void print_decision_scores(int f, decision_scores_t& decision_scores)

void print_update(vw& all, std::vector<example*>& slots, decision_scores_t& decision_scores, size_t num_features)
{
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.logger.quiet && !all.bfgs)
{
std::string label_str = "";
std::string delim = "";
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/cost_sensitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ label_parser cs_label = {default_label, parse_label, cache_label, read_cached_la

void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction)
{
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet && !all.bfgs)
if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.logger.quiet && !all.bfgs)
{
size_t num_current_features = ec.num_features;
// for csoaa_ldf we want features from the whole (multiline example),
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/explore_eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct explore_eval

void finish(explore_eval& data)
{
if (!data.all->quiet)
if (!data.all->logger.quiet)
{
data.all->trace_message << "update count = " << data.update_count << std::endl;
if (data.violations > 0)
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/expreplay.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ LEARNER::base_learner* expreplay_setup(VW::config::options_i& options, vw& all)

er->filled = calloc_or_throw<bool>(er->N);

if (!all.quiet)
if (!all.logger.quiet)
std::cerr << "experience replay level=" << er_level << ", buffer=" << er->N << ", replay count=" << er->replay_count
<< std::endl;

Expand Down
10 changes: 5 additions & 5 deletions vowpalwabbit/ftrl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ template <bool audit>
void predict(ftrl& b, single_learner&, example& ec)
{
ec.partial_prediction = GD::inline_predict(*b.all, ec);
ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction);
ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction);
if (audit)
GD::print_audit_features(*(b.all), ec);
}
Expand All @@ -102,7 +102,7 @@ void multipredict(
if (all.sd->contraction != 1.)
for (size_t c = 0; c < count; c++) pred[c].scalar *= (float)all.sd->contraction;
if (finalize_predictions)
for (size_t c = 0; c < count; c++) pred[c].scalar = GD::finalize_prediction(all.sd, pred[c].scalar);
for (size_t c = 0; c < count; c++) pred[c].scalar = GD::finalize_prediction(all.sd, all.logger, pred[c].scalar);
if (audit)
{
for (size_t c = 0; c < count; c++)
Expand Down Expand Up @@ -229,7 +229,7 @@ void update_state_and_predict_cb(ftrl& b, single_learner&, example& ec)

ec.partial_prediction = b.data.predict / ((float)((b.all->normalized_sum_norm_x + 1e-6) / b.total_weight));

ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction);
ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction);
}

void update_state_and_predict_pistol(ftrl& b, single_learner&, example& ec)
Expand All @@ -238,7 +238,7 @@ void update_state_and_predict_pistol(ftrl& b, single_learner&, example& ec)

GD::foreach_feature<update_data, inner_update_pistol_state_and_predict>(*b.all, ec, b.data);
ec.partial_prediction = b.data.predict;
ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction);
ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction);
}

void update_after_prediction_proximal(ftrl& b, example& ec)
Expand Down Expand Up @@ -399,7 +399,7 @@ base_learner* ftrl_setup(options_i& options, vw& all)
b->data.l1_lambda = b->all->l1_lambda;
b->data.l2_lambda = b->all->l2_lambda;

if (!all.quiet)
if (!all.logger.quiet)
{
std::cerr << "Enabling FTRL based optimization" << std::endl;
std::cerr << "Algorithm used: " << algorithm_name << std::endl;
Expand Down
10 changes: 6 additions & 4 deletions vowpalwabbit/gd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,14 @@ void print_audit_features(vw& all, example& ec)
print_features(all, ec);
}

float finalize_prediction(shared_data* sd, float ret)
float finalize_prediction(shared_data* sd, vw_logger& logger, float ret)
{
if (std::isnan(ret))
{
ret = 0.;
std::cerr << "NAN prediction in example " << sd->example_number + 1 << ", forcing " << ret << std::endl;
if (!logger.quiet) {
std::cerr << "NAN prediction in example " << sd->example_number + 1 << ", forcing " << ret << std::endl;
}
return ret;
}
if (ret > sd->max_label)
Expand Down Expand Up @@ -377,7 +379,7 @@ void predict(gd& g, base_learner&, example& ec)
ec.partial_prediction = inline_predict(all, ec);

ec.partial_prediction *= (float)all.sd->contraction;
ec.pred.scalar = finalize_prediction(all.sd, ec.partial_prediction);
ec.pred.scalar = finalize_prediction(all.sd, all.logger, ec.partial_prediction);
if (audit)
print_audit_features(all, ec);
}
Expand Down Expand Up @@ -416,7 +418,7 @@ void multipredict(
if (all.sd->contraction != 1.)
for (size_t c = 0; c < count; c++) pred[c].scalar *= (float)all.sd->contraction;
if (finalize_predictions)
for (size_t c = 0; c < count; c++) pred[c].scalar = finalize_prediction(all.sd, pred[c].scalar);
for (size_t c = 0; c < count; c++) pred[c].scalar = finalize_prediction(all.sd, all.logger, pred[c].scalar);
if (audit)
{
for (size_t c = 0; c < count; c++)
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/gd.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all);

struct gd;

float finalize_prediction(shared_data* sd, float ret);
float finalize_prediction(shared_data* sd, vw_logger& logger, float ret);
void print_audit_features(vw&, example& ec);
void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text);
void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text, double& total_weight,
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/gd_mf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ float mf_predict(gdmf& d, example& ec, T& weights)

all.set_minmax(all.sd, ld.label);

ec.pred.scalar = GD::finalize_prediction(all.sd, ec.partial_prediction);
ec.pred.scalar = GD::finalize_prediction(all.sd, all.logger, ec.partial_prediction);

if (ld.label != FLT_MAX)
ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ec.weight;
Expand Down
17 changes: 14 additions & 3 deletions vowpalwabbit/global_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,6 @@ enum AllReduceType

class AllReduce;


enum class label_type_t
{
simple,
Expand All @@ -345,6 +344,18 @@ struct rand_state
void set_random_state(uint64_t initial) noexcept { random_state = initial; }
};

struct vw_logger
{
bool quiet;

vw_logger()
: quiet(false) {
}

vw_logger(const vw_logger& other) = delete;
vw_logger& operator=(const vw_logger& other) = delete;
};

struct vw
{
private:
Expand Down Expand Up @@ -466,8 +477,8 @@ struct vw
namespace_dictionaries{}; // each namespace has a list of dictionaries attached to it

void (*delete_prediction)(void*);
vw_logger logger;
bool audit; // should I print lots of debugging information?
bool quiet; // Should I suppress progress-printing of updates?
bool training; // Should I train if lable data is available?
bool active;
bool invariant_updates; // Should we use importance aware/safe updates
Expand Down Expand Up @@ -538,7 +549,7 @@ struct vw

vw();
std::shared_ptr<rand_state> get_random_state() { return _random_state_sp; }

vw(const vw&) = delete;
vw& operator=(const vw&) = delete;

Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/interact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ LEARNER::base_learner* interact_setup(options_i& options, vw& all)

data->n1 = (unsigned char)s[0];
data->n2 = (unsigned char)s[1];
if (!all.quiet)
if (!all.logger.quiet)
std::cerr << "Interacting namespaces " << data->n1 << " and " << data->n2 << std::endl;
data->all = &all;

Expand Down
Loading

0 comments on commit 3c0f0ff

Please sign in to comment.