diff --git a/cs/cli/vowpalwabbit.cpp b/cs/cli/vowpalwabbit.cpp index d40cb1ad78b..38e3f96f385 100644 --- a/cs/cli/vowpalwabbit.cpp +++ b/cs/cli/vowpalwabbit.cpp @@ -62,7 +62,7 @@ void VowpalWabbit::Driver() void VowpalWabbit::RunMultiPass() { if (m_vw->numpasses > 1) { try - { adjust_used_index(*m_vw); + { m_vw->do_reset_source = true; VW::start_parser(*m_vw); LEARNER::generic_driver(*m_vw); @@ -307,7 +307,7 @@ List^ VowpalWabbit::ParseDecisionServiceJson(cli::arrayexamples->Add(ex); - v_array examples = v_init(); + v_array examples; example* native_example = ex->m_example; examples.push_back(native_example); @@ -326,9 +326,6 @@ List^ VowpalWabbit::ParseDecisionServiceJson(cli::arrayEventId = gcnew String(interaction.eventId.c_str()); header->Actions = gcnew cli::array((int)interaction.actions.size()); int index = 0; @@ -789,7 +786,7 @@ VowpalWabbitExample^ VowpalWabbit::GetOrCreateNativeExample() if (ex == nullptr) { try { auto ex = VW::alloc_examples(0, 1); - m_vw->p->lp.default_label(&ex->l); + m_vw->p->lp.default_label(ex->l); return gcnew VowpalWabbitExample(this, ex); } CATCHRETHROW @@ -797,7 +794,7 @@ VowpalWabbitExample^ VowpalWabbit::GetOrCreateNativeExample() try { VW::empty_example(*m_vw, *ex->m_example); - m_vw->p->lp.default_label(&ex->m_example->l); + m_vw->p->lp.default_label(ex->m_example->l); return ex; } diff --git a/cs/cli/vw_example.cpp b/cs/cli/vw_example.cpp index 8a73c46f74e..0dc7b24b091 100644 --- a/cs/cli/vw_example.cpp +++ b/cs/cli/vw_example.cpp @@ -97,7 +97,7 @@ void VowpalWabbitExample::Label::set(ILabel^ label) label->UpdateExample(m_owner->Native->m_vw, m_example); // we need to update the example weight as setup_example() can be called prior to this call. - m_example->weight = m_owner->Native->m_vw->p->lp.get_weight(&m_example->l); + m_example->weight = m_owner->Native->m_vw->p->lp.get_weight(m_example->l); } void VowpalWabbitExample::MakeEmpty(VowpalWabbit^ vw) @@ -280,8 +280,8 @@ System::String^ VowpalWabbitExample::Diff(VowpalWabbit^ vw, VowpalWabbitExample^ } String^ VowpalWabbitSimpleLabelComparator::Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) -{ auto s1 = ex1->m_example->l.simple; - auto s2 = ex2->m_example->l.simple; +{ auto& s1 = ex1->m_example->l.simple(); + auto& s2 = ex2->m_example->l.simple(); if (!(FloatEqual(s1.initial, s2.initial) && FloatEqual(s1.label, s2.label) && @@ -296,8 +296,8 @@ String^ VowpalWabbitSimpleLabelComparator::Diff(VowpalWabbitExample^ ex1, Vowpal } String^ VowpalWabbitContextualBanditLabelComparator::Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) -{ auto s1 = ex1->m_example->l.cb; - auto s2 = ex2->m_example->l.cb; +{ auto& s1 = ex1->m_example->l.cb(); + auto& s2 = ex2->m_example->l.cb(); if (s1.costs.size() != s2.costs.size()) { return System::String::Format("Cost size differ: {0} vs {1}", s1.costs.size(), s2.costs.size()); diff --git a/cs/cli/vw_prediction.cpp b/cs/cli/vw_prediction.cpp index bfce5b3a80f..b93e67a5cd3 100644 --- a/cs/cli/vw_prediction.cpp +++ b/cs/cli/vw_prediction.cpp @@ -10,7 +10,8 @@ namespace VW { void CheckExample(vw* vw, example* ex, prediction_type_t type) -{ if (vw == nullptr) +{ + if (vw == nullptr) throw gcnew ArgumentNullException("vw"); if (ex == nullptr) @@ -18,7 +19,8 @@ void CheckExample(vw* vw, example* ex, prediction_type_t type) auto ex_pred_type = vw->l->pred_type; if (ex_pred_type != type) - { auto sb = gcnew StringBuilder(); + { + auto sb = gcnew StringBuilder(); sb->Append("Prediction type must be "); sb->Append(gcnew String(to_string(type))); sb->Append(" but is "); @@ -29,20 +31,23 @@ void CheckExample(vw* vw, example* ex, prediction_type_t type) } float VowpalWabbitScalarPredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +{ + CheckExample(vw, ex, PredictionType); try - { return VW::get_prediction(ex); + { + return VW::get_prediction(ex); } CATCHRETHROW } - VowpalWabbitScalar VowpalWabbitScalarConfidencePredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +{ + CheckExample(vw, ex, PredictionType); try - { VowpalWabbitScalar ret; + { + VowpalWabbitScalar ret; ret.Value = VW::get_prediction(ex); ret.Confidence = ex->confidence; @@ -52,15 +57,16 @@ VowpalWabbitScalar VowpalWabbitScalarConfidencePredictionFactory::Create(vw* vw, CATCHRETHROW } -cli::array^ VowpalWabbitScalarsPredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +cli::array ^ VowpalWabbitScalarsPredictionFactory::Create(vw* vw, example* ex) +{ + CheckExample(vw, ex, PredictionType); try - { auto& scalars = ex->pred.scalars; + { + auto& scalars = ex->pred.scalars(); auto values = gcnew cli::array((int)scalars.size()); int index = 0; - for (float s : scalars) - values[index++] = s; + for (float s : scalars) values[index++] = s; return values; } @@ -68,21 +74,24 @@ cli::array^ VowpalWabbitScalarsPredictionFactory::Create(vw* vw, example* } float VowpalWabbitProbabilityPredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +{ + CheckExample(vw, ex, PredictionType); - return ex->pred.prob; + return ex->pred.prob(); } float VowpalWabbitCostSensitivePredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +{ + CheckExample(vw, ex, PredictionType); try - { return VW::get_cost_sensitive_prediction(ex); + { + return VW::get_cost_sensitive_prediction(ex); } CATCHRETHROW } -Dictionary^ VowpalWabbitMulticlassProbabilitiesPredictionFactory::Create(vw* vw, example* ex) +Dictionary ^ VowpalWabbitMulticlassProbabilitiesPredictionFactory::Create(vw* vw, example* ex) { #if _DEBUG if (ex == nullptr) @@ -91,33 +100,38 @@ Dictionary^ VowpalWabbitMulticlassProbabilitiesPredictionFactory::Cr v_array confidence_scores; try - { confidence_scores = VW::get_cost_sensitive_prediction_confidence_scores(ex); + { + confidence_scores = VW::get_cost_sensitive_prediction_confidence_scores(ex); } CATCHRETHROW auto values = gcnew Dictionary(); int i = 0; for (auto& val : confidence_scores) - { values->Add(++i, val); + { + values->Add(++i, val); } return values; } uint32_t VowpalWabbitMulticlassPredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +{ + CheckExample(vw, ex, PredictionType); - return ex->pred.multiclass; + return ex->pred.multiclass(); } -cli::array^ VowpalWabbitMultilabelPredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, prediction_type_t::multilabels); +cli::array ^ VowpalWabbitMultilabelPredictionFactory::Create(vw* vw, example* ex) +{ + CheckExample(vw, ex, prediction_type_t::multilabels); size_t length; uint32_t* labels; try - { labels = VW::get_multilabel_predictions(ex, length); + { + labels = VW::get_multilabel_predictions(ex, length); } CATCHRETHROW @@ -132,15 +146,25 @@ cli::array^ VowpalWabbitMultilabelPredictionFactory::Create(vw* vw, example return values; } -cli::array^ VowpalWabbitActionScoreBasePredictionFactory::Create(vw* vw, example* ex) -{ CheckExample(vw, ex, PredictionType); +cli::array ^ VowpalWabbitActionScoreBasePredictionFactory::Create(vw* vw, example* ex) +{ + CheckExample(vw, ex, PredictionType); - auto& a_s = ex->pred.a_s; - auto values = gcnew cli::array((int)a_s.size()); + ACTION_SCORE::action_scores* a_s = nullptr; + if (ex->pred.get_type() == prediction_type_t::action_scores) + { + a_s = &ex->pred.action_scores(); + } + else + { + a_s = &ex->pred.action_probs(); + } + auto values = gcnew cli::array((int)a_s->size()); auto index = 0; - for (auto& as : a_s) - { values[index].Action = as.action; + for (auto& as : *a_s) + { + values[index].Action = as.action; values[index].Score = as.score; index++; } @@ -148,22 +172,25 @@ cli::array^ VowpalWabbitActionScoreBasePredictionFactory::Create(vw return values; } -cli::array^ VowpalWabbitTopicPredictionFactory::Create(vw* vw, example* ex) -{ if (ex == nullptr) +cli::array ^ VowpalWabbitTopicPredictionFactory::Create(vw* vw, example* ex) +{ + if (ex == nullptr) throw gcnew ArgumentNullException("ex"); auto values = gcnew cli::array(vw->lda); - Marshal::Copy(IntPtr(ex->pred.scalars.begin()), values, 0, vw->lda); + Marshal::Copy(IntPtr(ex->pred.scalars().begin()), values, 0, vw->lda); return values; } -System::Object^ VowpalWabbitDynamicPredictionFactory::Create(vw* vw, example* ex) -{ if (ex == nullptr) +System::Object ^ VowpalWabbitDynamicPredictionFactory::Create(vw* vw, example* ex) +{ + if (ex == nullptr) throw gcnew ArgumentNullException("ex"); switch (vw->l->pred_type) - { case prediction_type_t::scalar: + { + case prediction_type_t::scalar: return VowpalWabbitPredictionType::Scalar->Create(vw, ex); case prediction_type_t::scalars: return VowpalWabbitPredictionType::Scalars->Create(vw, ex); @@ -180,11 +207,12 @@ System::Object^ VowpalWabbitDynamicPredictionFactory::Create(vw* vw, example* ex case prediction_type_t::multiclassprobs: return VowpalWabbitPredictionType::MultiClassProbabilities->Create(vw, ex); default: - { auto sb = gcnew StringBuilder(); + { + auto sb = gcnew StringBuilder(); sb->Append("Unsupported prediction type: "); sb->Append(gcnew String(to_string(vw->l->pred_type))); throw gcnew ArgumentException(sb->ToString()); } } } -} +} // namespace VW diff --git a/java/src/main/c++/jni_base_learner.h b/java/src/main/c++/jni_base_learner.h index 6b1c0556372..0f6e55ddbad 100644 --- a/java/src/main/c++/jni_base_learner.h +++ b/java/src/main/c++/jni_base_learner.h @@ -74,9 +74,10 @@ T base_predict(JNIEnv* env, jobjectArray example_strings, jboolean learn, jlong rethrow_cpp_exception_as_java_exception(env); } + T result = predictor(first_example, env); vwInstance->finish_example(ex_coll); - return predictor(first_example, env); + return result; } #endif // VW_BASE_LEARNER_H diff --git a/java/src/main/c++/jni_spark_vw.cc b/java/src/main/c++/jni_spark_vw.cc index 224298dcc26..49ddb788e5b 100644 --- a/java/src/main/c++/jni_spark_vw.cc +++ b/java/src/main/c++/jni_spark_vw.cc @@ -256,7 +256,7 @@ JNIEXPORT jlong JNICALL Java_org_vowpalwabbit_spark_VowpalWabbitExample_initiali try { - example* ex = VW::alloc_examples(0, 1); + example* ex = VW::alloc_examples(1); ex->interactions = &all->interactions; if (isEmpty) @@ -265,7 +265,7 @@ JNIEXPORT jlong JNICALL Java_org_vowpalwabbit_spark_VowpalWabbitExample_initiali VW::read_line(*all, ex, &empty); } else - all->p->lp.default_label(&ex->l); + all->p->lp.default_label(ex->l); return (jlong) new VowpalWabbitExampleWrapper(all, ex); } @@ -297,7 +297,7 @@ JNIEXPORT void JNICALL Java_org_vowpalwabbit_spark_VowpalWabbitExample_clear(JNI try { VW::empty_example(*all, *ex); - all->p->lp.default_label(&ex->l); + all->p->lp.default_label(ex->l); } catch (...) { @@ -444,7 +444,7 @@ JNIEXPORT jobject JNICALL Java_org_vowpalwabbit_spark_VowpalWabbitExample_getPre ctr = env->GetMethodID(predClass, "", "(F)V"); CHECK_JNI_EXCEPTION(nullptr); - return env->NewObject(predClass, ctr, ex->pred.prob); + return env->NewObject(predClass, ctr, ex->pred.prob()); case prediction_type_t::multiclass: predClass = env->FindClass("java/lang/Integer"); @@ -453,7 +453,7 @@ JNIEXPORT jobject JNICALL Java_org_vowpalwabbit_spark_VowpalWabbitExample_getPre ctr = env->GetMethodID(predClass, "", "(I)V"); CHECK_JNI_EXCEPTION(nullptr); - return env->NewObject(predClass, ctr, ex->pred.multiclass); + return env->NewObject(predClass, ctr, ex->pred.multiclass()); case prediction_type_t::scalars: return scalars_predictor(ex, env); diff --git a/java/src/main/c++/vowpalWabbit_learner_VWActionProbsLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWActionProbsLearner.cc index 8434219515f..ee90a2ee592 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWActionProbsLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWActionProbsLearner.cc @@ -7,17 +7,17 @@ jobject action_probs_prediction(example *vec, JNIEnv *env) jclass action_prob_class = env->FindClass("vowpalWabbit/responses/ActionProb"); jmethodID action_prob_constructor = env->GetMethodID(action_prob_class, "", "(IF)V"); - // The action_probs prediction_type_t is just a placeholder identifying when the aciton_scores + // The action_probs prediction_type_t is just a placeholder identifying when the action_scores // should be treated as probabilities or scores. That is why this function references a_s yet returns // ActionProbs to the Java side. - ACTION_SCORE::action_scores a_s = vec->pred.a_s; + const auto& a_s = vec->pred.action_probs(); size_t num_values = a_s.size(); jobjectArray j_action_probs = env->NewObjectArray(num_values, action_prob_class, 0); jclass action_probs_class = env->FindClass("vowpalWabbit/responses/ActionProbs"); for (uint32_t i = 0; i < num_values; ++i) { - ACTION_SCORE::action_score a = a_s[i]; + const auto& a = a_s[i]; jobject j_action_prob = env->NewObject(action_prob_class, action_prob_constructor, a.action, a.score); env->SetObjectArrayElement(j_action_probs, i, j_action_prob); } diff --git a/java/src/main/c++/vowpalWabbit_learner_VWActionScoresLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWActionScoresLearner.cc index a5591383bdf..1259ce87d05 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWActionScoresLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWActionScoresLearner.cc @@ -7,14 +7,14 @@ jobject action_scores_prediction(example *vec, JNIEnv *env) jclass action_score_class = env->FindClass("vowpalWabbit/responses/ActionScore"); jmethodID action_score_constructor = env->GetMethodID(action_score_class, "", "(IF)V"); - ACTION_SCORE::action_scores a_s = vec->pred.a_s; + const auto a_s = vec->pred.action_scores(); size_t num_values = a_s.size(); jobjectArray j_action_scores = env->NewObjectArray(num_values, action_score_class, 0); jclass action_scores_class = env->FindClass("vowpalWabbit/responses/ActionScores"); for (uint32_t i = 0; i < num_values; ++i) { - ACTION_SCORE::action_score a = a_s[i]; + const auto a = a_s[i]; jobject j_action_score = env->NewObject(action_score_class, action_score_constructor, a.action, a.score); env->SetObjectArrayElement(j_action_scores, i, j_action_score); } diff --git a/java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.cc index 4541cd1a099..cb51bb267d2 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.cc @@ -2,7 +2,7 @@ #include "vw.h" #include "jni_base_learner.h" -jint multiclass_predictor(example *vec, JNIEnv *env) { return vec->pred.multiclass; } +jint multiclass_predictor(example *vec, JNIEnv *env) { return vec->pred.multiclass(); } JNIEXPORT jint JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predict( JNIEnv *env, jobject obj, jstring example_string, jboolean learn, jlong vwPtr) diff --git a/java/src/main/c++/vowpalWabbit_learner_VWMultilabelsLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWMultilabelsLearner.cc index e73fd4327f2..7f266ac218c 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWMultilabelsLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWMultilabelsLearner.cc @@ -4,7 +4,7 @@ jobject multilabel_predictor(example *vec, JNIEnv *env) { - auto& labels = vec->pred.multilabels.label_v; + auto& labels = vec->pred.multilabels().label_v; size_t num_values = labels.size(); jintArray j_labels = env->NewIntArray(num_values); env->SetIntArrayRegion(j_labels, 0, num_values, (int *)labels.begin()); diff --git a/java/src/main/c++/vowpalWabbit_learner_VWProbLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWProbLearner.cc index 4f6fa359e0a..cf8194eea4f 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWProbLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWProbLearner.cc @@ -2,7 +2,7 @@ #include "vw.h" #include "jni_base_learner.h" -jfloat prob_predictor(example *vec, JNIEnv *env) { return vec->pred.prob; } +jfloat prob_predictor(example *vec, JNIEnv *env) { return vec->pred.prob(); } JNIEXPORT jfloat JNICALL Java_vowpalWabbit_learner_VWProbLearner_predict( JNIEnv *env, jobject obj, jstring example_string, jboolean learn, jlong vwPtr) diff --git a/java/src/main/c++/vowpalWabbit_learner_VWScalarLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWScalarLearner.cc index def69372304..acb908a8366 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWScalarLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWScalarLearner.cc @@ -2,7 +2,7 @@ #include "vw.h" #include "jni_base_learner.h" -jfloat scalar_predictor(example *vec, JNIEnv *env) { return vec->pred.scalar; } +jfloat scalar_predictor(example *vec, JNIEnv *env) { return vec->pred.scalar(); } JNIEXPORT jfloat JNICALL Java_vowpalWabbit_learner_VWScalarLearner_predict( JNIEnv *env, jobject obj, jstring example_string, jboolean learn, jlong vwPtr) diff --git a/java/src/main/c++/vowpalWabbit_learner_VWScalarsLearner.cc b/java/src/main/c++/vowpalWabbit_learner_VWScalarsLearner.cc index 44386aefd79..5f4d5f20ac4 100644 --- a/java/src/main/c++/vowpalWabbit_learner_VWScalarsLearner.cc +++ b/java/src/main/c++/vowpalWabbit_learner_VWScalarsLearner.cc @@ -4,7 +4,7 @@ jfloatArray scalars_predictor(example *vec, JNIEnv *env) { - auto& scalars = vec->pred.scalars; + auto& scalars = vec->pred.scalars(); size_t num_values = scalars.size(); jfloatArray r = env->NewFloatArray(num_values); env->SetFloatArrayRegion(r, 0, num_values, (float *)scalars.begin()); diff --git a/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java b/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java index 8f1e27de934..c4ce9082b8c 100644 --- a/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java +++ b/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java @@ -5,6 +5,7 @@ import org.junit.rules.TemporaryFolder; import vowpalWabbit.VWTestHelper; import vowpalWabbit.responses.ActionScores; +import vowpalWabbit.responses.ActionProbs; import java.io.IOException; @@ -85,40 +86,40 @@ private void testCBADF(boolean withRank) throws IOException { String cli = "--quiet --cb_adf -f " + model; if (withRank) cli += " --rank_all"; - VWActionScoresLearner vw = VWLearners.create(cli); - ActionScores[] trainPreds = new ActionScores[cbADFTrain.length]; + VWActionProbsLearner vw = VWLearners.create(cli); + ActionProbs[] trainPreds = new ActionProbs[cbADFTrain.length]; for (int i=0; ilearn(*vec2); - std::cerr << "p2 = " << vec2->pred.scalar << std::endl; + std::cerr << "p2 = " << vec2->pred.scalar() << std::endl; VW::finish_example(*model, *vec2); VW::primitive_feature_space features[2]; @@ -37,7 +37,7 @@ int main(int argc, char *argv[]) example* vec3 = VW::import_example(*model, "", features, 2); model->learn(*vec3); - std::cerr << "p3 = " << vec3->pred.scalar << std::endl; + std::cerr << "p3 = " << vec3->pred.scalar() << std::endl; // TODO: this does not invoke m_vw->l->finish_example() VW::finish_example(*model, *vec3); @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) vw* model2 = VW::initialize("--hash all -q st --noconstant -i train2.vw --no_stdin"); vec2 = VW::read_example(*model2, (char*)" |s p^the_man w^the w^man |t p^un_homme w^un w^homme"); model2->learn(*vec2); - std::cerr << "p4 = " << vec2->pred.scalar << std::endl; + std::cerr << "p4 = " << vec2->pred.scalar() << std::endl; size_t len=0; VW::primitive_feature_space* pfs = VW::export_example(*model2, vec2, len); diff --git a/library/libsearch.h b/library/libsearch.h index 7b1b276c04b..182bd322505 100644 --- a/library/libsearch.h +++ b/library/libsearch.h @@ -6,21 +6,21 @@ license as described in the file LICENSE. #ifndef LIBSEARCH_HOOKTASK_H #define LIBSEARCH_HOOKTASK_H -#include "../vowpalwabbit/parser.h" -#include "../vowpalwabbit/parse_example.h" -#include "../vowpalwabbit/vw.h" -#include "../vowpalwabbit/search.h" -#include "../vowpalwabbit/search_hooktask.h" +#include "parser.h" +#include "parse_example.h" +#include "vw.h" +#include "search.h" +#include "search_hooktask.h" template class SearchTask { public: SearchTask(vw& vw_obj) : vw_obj(vw_obj), sch(*(Search::search*)vw_obj.searchstr) - { bogus_example = VW::alloc_examples(vw_obj.p->lp.label_size, 1); - VW::read_line(vw_obj, bogus_example, (char*)"1 | x"); - VW::setup_example(vw_obj, bogus_example); + { + VW::read_line(vw_obj, &bogus_example, (char*)"1 | x"); + VW::setup_example(vw_obj, &bogus_example); - trigger.push_back(bogus_example); + trigger.push_back(&bogus_example); HookTask::task_data* d = sch.get_task_data(); d->run_f = _search_run_fn; @@ -31,23 +31,23 @@ template class SearchTask d->extra_data2 = NULL; } virtual ~SearchTask() - { trigger.clear(); // the individual examples get cleaned up below - VW::dealloc_example(vw_obj.p->lp.delete_label, *bogus_example); free(bogus_example); + { + trigger.clear(); } virtual void _run(Search::search&sch, INPUT& input_example, OUTPUT& output) {} // YOU MUST DEFINE THIS FUNCTION! void _setup(Search::search&sch, INPUT& input_example, OUTPUT& output) {} // OPTIONAL void _takedown(Search::search&sch, INPUT& input_example, OUTPUT& output) {} // OPTIONAL - void learn(INPUT& input_example, OUTPUT& output) { bogus_example->test_only = false; call_vw(input_example, output); } - void predict(INPUT& input_example, OUTPUT& output) { bogus_example->test_only = true; call_vw(input_example, output); } + void learn(INPUT& input_example, OUTPUT& output) { bogus_example.test_only = false; call_vw(input_example, output); } + void predict(INPUT& input_example, OUTPUT& output) { bogus_example.test_only = true; call_vw(input_example, output); } protected: vw& vw_obj; Search::search& sch; private: - example* bogus_example; + example bogus_example; multi_ex trigger; void call_vw(INPUT& input_example, OUTPUT& output) diff --git a/library/recommend.cc b/library/recommend.cc index a3ab42b47ca..d5bdd15518f 100644 --- a/library/recommend.cc +++ b/library/recommend.cc @@ -230,12 +230,12 @@ int main(int argc, char* argv[]) if (pr_queue.size() < (size_t)topk) { - pr_queue.push(std::make_pair(ex->pred.scalar, str)); + pr_queue.push(std::make_pair(ex->pred.scalar(), str)); } - else if (pr_queue.top().first < ex->pred.scalar) + else if (pr_queue.top().first < ex->pred.scalar()) { pr_queue.pop(); - pr_queue.push(std::make_pair(ex->pred.scalar, str)); + pr_queue.push(std::make_pair(ex->pred.scalar(), str)); } VW::finish_example(*model, *ex); diff --git a/library/search_generate.cc b/library/search_generate.cc index 91e7d93fe81..25ccdea51a8 100644 --- a/library/search_generate.cc +++ b/library/search_generate.cc @@ -242,7 +242,7 @@ class Generator : public SearchTask Trie* cdict = dict; - v_array ref = v_init(); + v_array ref; int N = in.in.length(); out = "^"; std::vector next; diff --git a/python/pylibvw.cc b/python/pylibvw.cc index b24bdca8c37..5e923b5ff4e 100644 --- a/python/pylibvw.cc +++ b/python/pylibvw.cc @@ -145,27 +145,19 @@ size_t my_get_prediction_type(vw_ptr all) case prediction_type_t::multilabels: return pMULTILABELS; case prediction_type_t::prob: return pPROB; case prediction_type_t::multiclassprobs: return pMULTICLASSPROBS; - case prediction_type_t::decision_probs: return pDECISION_SCORES; + case prediction_type_t::decision_scores: return pDECISION_SCORES; default: THROW("unsupported prediction type used"); } } -void my_delete_example(void*voidec) -{ example* ec = (example*) voidec; - size_t labelType = ec->example_counter; - label_parser* lp = get_label_parser(NULL, labelType); - VW::dealloc_example(lp ? lp->delete_label : NULL, *ec); - free(ec); -} - example* my_empty_example0(vw_ptr vw, size_t labelType) { label_parser* lp = get_label_parser(&*vw, labelType); - example* ec = VW::alloc_examples(lp->label_size, 1); - lp->default_label(&ec->l); + example* ec = VW::alloc_examples(1); + lp->default_label(ec->l); ec->interactions = &vw->interactions; if (labelType == lCOST_SENSITIVE) { COST_SENSITIVE::wclass zero = { 0., 1, 0., 0. }; - ec->l.cs.costs.push_back(zero); + ec->l.cs().costs.push_back(zero); } ec->example_counter = labelType; return ec; @@ -173,7 +165,7 @@ example* my_empty_example0(vw_ptr vw, size_t labelType) example_ptr my_empty_example(vw_ptr vw, size_t labelType) { example* ec = my_empty_example0(vw, labelType); - return boost::shared_ptr(ec, my_delete_example); + return boost::shared_ptr(ec); } example_ptr my_read_example(vw_ptr all, size_t labelType, char* str) @@ -181,7 +173,7 @@ example_ptr my_read_example(vw_ptr all, size_t labelType, char* str) VW::read_line(*all, ec, str); VW::setup_example(*all, ec); ec->example_counter = labelType; - return boost::shared_ptr(ec, my_delete_example); + return boost::shared_ptr(ec); } example_ptr my_existing_example(vw_ptr all, size_t labelType, example_ptr existing_example) @@ -241,7 +233,7 @@ void predict_or_learn(vw_ptr& all, py::list& ec) py::list my_parse(vw_ptr& all, char* str) { - v_array examples = v_init(); + v_array examples; examples.push_back(&VW::get_unused_example(all.get())); all->p->text_reader(all.get(), str, strlen(str), examples); @@ -254,8 +246,6 @@ py::list my_parse(vw_ptr& all, char* str) example_collection.append( boost::shared_ptr(ex, dont_delete_me)); } - examples.clear(); - examples.delete_v(); return example_collection; } @@ -414,7 +404,7 @@ void my_setup_example(vw_ptr vw, example_ptr ec) } void unsetup_example(vw_ptr vwP, example_ptr ae) -{ vw&all = *vwP; +{ vw& all = *vwP; ae->partial_prediction = 0.; ae->num_features = 0; ae->total_sum_feat_sq = 0; @@ -469,19 +459,19 @@ void ex_set_label_string(example_ptr ec, vw_ptr vw, std::string label, size_t la vw->p->lp = old_lp; } -float ex_get_simplelabel_label(example_ptr ec) { return ec->l.simple.label; } -float ex_get_simplelabel_weight(example_ptr ec) { return ec->l.simple.weight; } -float ex_get_simplelabel_initial(example_ptr ec) { return ec->l.simple.initial; } -float ex_get_simplelabel_prediction(example_ptr ec) { return ec->pred.scalar; } -float ex_get_prob(example_ptr ec) { return ec->pred.prob; } +float ex_get_simplelabel_label(example_ptr ec) { return ec->l.simple().label; } +float ex_get_simplelabel_weight(example_ptr ec) { return ec->l.simple().weight; } +float ex_get_simplelabel_initial(example_ptr ec) { return ec->l.simple().initial; } +float ex_get_simplelabel_prediction(example_ptr ec) { return ec->pred.scalar(); } +float ex_get_prob(example_ptr ec) { return ec->pred.prob(); } -uint32_t ex_get_multiclass_label(example_ptr ec) { return ec->l.multi.label; } -float ex_get_multiclass_weight(example_ptr ec) { return ec->l.multi.weight; } -uint32_t ex_get_multiclass_prediction(example_ptr ec) { return ec->pred.multiclass; } +uint32_t ex_get_multiclass_label(example_ptr ec) { return ec->l.multi().label; } +float ex_get_multiclass_weight(example_ptr ec) { return ec->l.multi().weight; } +uint32_t ex_get_multiclass_prediction(example_ptr ec) { return ec->pred.multiclass(); } py::list ex_get_scalars(example_ptr ec) { py::list values; - const auto& scalars = ec->pred.scalars; + const auto& scalars = ec->pred.scalars(); for (float s : scalars) { values.append(s); @@ -492,7 +482,7 @@ py::list ex_get_scalars(example_ptr ec) py::list ex_get_action_scores(example_ptr ec) { py::list values; - auto const& scores = ec->pred.a_s; + auto const& scores = ec->pred.action_scores(); std::vector ordered_scores(scores.size()); for (auto const& action_score: scores) { @@ -510,7 +500,7 @@ py::list ex_get_action_scores(example_ptr ec) py::list ex_get_decision_scores(example_ptr ec) { py::list values; - for (auto const& scores : ec->pred.decision_scores) + for (auto const& scores : ec->pred.decision_scores()) { py::list inner_list; for (auto action_score: scores) @@ -526,7 +516,7 @@ py::list ex_get_decision_scores(example_ptr ec) py::list ex_get_multilabel_predictions(example_ptr ec) { py::list values; - MULTILABEL::labels labels = ec->pred.multilabels; + MULTILABEL::labels labels = ec->pred.multilabels(); for (uint32_t l : labels.label_v) { values.append(l); @@ -534,19 +524,19 @@ py::list ex_get_multilabel_predictions(example_ptr ec) return values; } -uint32_t ex_get_costsensitive_prediction(example_ptr ec) { return ec->pred.multiclass; } -uint32_t ex_get_costsensitive_num_costs(example_ptr ec) { return (uint32_t)ec->l.cs.costs.size(); } -float ex_get_costsensitive_cost(example_ptr ec, uint32_t i) { return ec->l.cs.costs[i].x; } -uint32_t ex_get_costsensitive_class(example_ptr ec, uint32_t i) { return ec->l.cs.costs[i].class_index; } -float ex_get_costsensitive_partial_prediction(example_ptr ec, uint32_t i) { return ec->l.cs.costs[i].partial_prediction; } -float ex_get_costsensitive_wap_value(example_ptr ec, uint32_t i) { return ec->l.cs.costs[i].wap_value; } +uint32_t ex_get_costsensitive_prediction(example_ptr ec) { return ec->pred.multiclass(); } +uint32_t ex_get_costsensitive_num_costs(example_ptr ec) { return (uint32_t)ec->l.cs().costs.size(); } +float ex_get_costsensitive_cost(example_ptr ec, uint32_t i) { return ec->l.cs().costs[i].x; } +uint32_t ex_get_costsensitive_class(example_ptr ec, uint32_t i) { return ec->l.cs().costs[i].class_index; } +float ex_get_costsensitive_partial_prediction(example_ptr ec, uint32_t i) { return ec->l.cs().costs[i].partial_prediction; } +float ex_get_costsensitive_wap_value(example_ptr ec, uint32_t i) { return ec->l.cs().costs[i].wap_value; } -uint32_t ex_get_cbandits_prediction(example_ptr ec) { return ec->pred.multiclass; } -uint32_t ex_get_cbandits_num_costs(example_ptr ec) { return (uint32_t)ec->l.cb.costs.size(); } -float ex_get_cbandits_cost(example_ptr ec, uint32_t i) { return ec->l.cb.costs[i].cost; } -uint32_t ex_get_cbandits_class(example_ptr ec, uint32_t i) { return ec->l.cb.costs[i].action; } -float ex_get_cbandits_probability(example_ptr ec, uint32_t i) { return ec->l.cb.costs[i].probability; } -float ex_get_cbandits_partial_prediction(example_ptr ec, uint32_t i) { return ec->l.cb.costs[i].partial_prediction; } +uint32_t ex_get_cbandits_prediction(example_ptr ec) { return ec->pred.multiclass(); } +uint32_t ex_get_cbandits_num_costs(example_ptr ec) { return (uint32_t)ec->l.cb().costs.size(); } +float ex_get_cbandits_cost(example_ptr ec, uint32_t i) { return ec->l.cb().costs[i].cost; } +uint32_t ex_get_cbandits_class(example_ptr ec, uint32_t i) { return ec->l.cb().costs[i].action; } +float ex_get_cbandits_probability(example_ptr ec, uint32_t i) { return ec->l.cb().costs[i].probability; } +float ex_get_cbandits_partial_prediction(example_ptr ec, uint32_t i) { return ec->l.cb().costs[i].partial_prediction; } // example_counter is being overriden by lableType! size_t get_example_counter(example_ptr ec) { return ec->example_counter; } diff --git a/test/unit_test/ccb_parser_test.cc b/test/unit_test/ccb_parser_test.cc index fff6f543de7..0a074dfdc67 100644 --- a/test/unit_test/ccb_parser_test.cc +++ b/test/unit_test/ccb_parser_test.cc @@ -8,112 +8,98 @@ #include #include "conditional_contextual_bandit.h" #include "parser.h" +#include "example.h" -void parse_label(label_parser& lp, parser* p, VW::string_view label, CCB::label& l) +void parse_label(label_parser& lp, parser* p, VW::string_view label, polylabel& l) { tokenize(' ', label, p->words); - lp.default_label(&l); - lp.parse_label(p, nullptr, &l, p->words); + lp.default_label(l); + lp.parse_label(p, nullptr, l, p->words); } BOOST_AUTO_TEST_CASE(ccb_parse_label) { auto lp = CCB::ccb_label_parser; parser p{8 /*ring_size*/, false /*strict parse*/}; - p.words = v_init(); - p.parse_name = v_init(); { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb shared", *label); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 0); - BOOST_CHECK(label->outcome == nullptr); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::shared); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 0); + BOOST_CHECK(label->ccb().outcome == nullptr); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::shared); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb action", *label.get()); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 0); - BOOST_CHECK(label->outcome == nullptr); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::action); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 0); + BOOST_CHECK(label->ccb().outcome == nullptr); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::action); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb slot", *label.get()); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 0); - BOOST_CHECK(label->outcome == nullptr); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::slot); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 0); + BOOST_CHECK(label->ccb().outcome == nullptr); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::slot); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb slot 1,3,4", *label.get()); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 3); - BOOST_CHECK_EQUAL(label->explicit_included_actions[0], 1); - BOOST_CHECK_EQUAL(label->explicit_included_actions[1], 3); - BOOST_CHECK_EQUAL(label->explicit_included_actions[2], 4); - BOOST_CHECK(label->outcome == nullptr); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::slot); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 3); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[0], 1); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[1], 3); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[2], 4); + BOOST_CHECK(label->ccb().outcome == nullptr); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::slot); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb slot 1:1.0:0.5 3", *label.get()); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 1); - BOOST_CHECK_EQUAL(label->explicit_included_actions[0], 3); - BOOST_CHECK_CLOSE(label->outcome->cost, 1.0f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->outcome->probabilities.size(), 1); - BOOST_CHECK_EQUAL(label->outcome->probabilities[0].action, 1); - BOOST_CHECK_CLOSE(label->outcome->probabilities[0].score, .5f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::slot); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 1); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[0], 3); + BOOST_CHECK_CLOSE(label->ccb().outcome->cost, 1.0f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities.size(), 1); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities[0].action, 1); + BOOST_CHECK_CLOSE(label->ccb().outcome->probabilities[0].score, .5f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::slot); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb slot 1:-2.0:0.5,2:0.25,3:0.25 3,4", *label.get()); - BOOST_CHECK_EQUAL(label->explicit_included_actions.size(), 2); - BOOST_CHECK_EQUAL(label->explicit_included_actions[0], 3); - BOOST_CHECK_EQUAL(label->explicit_included_actions[1], 4); - BOOST_CHECK_CLOSE(label->outcome->cost, -2.0f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->outcome->probabilities.size(), 3); - BOOST_CHECK_EQUAL(label->outcome->probabilities[0].action, 1); - BOOST_CHECK_CLOSE(label->outcome->probabilities[0].score, .5f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->outcome->probabilities[1].action, 2); - BOOST_CHECK_CLOSE(label->outcome->probabilities[1].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->outcome->probabilities[2].action, 3); - BOOST_CHECK_CLOSE(label->outcome->probabilities[2].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(label->type, CCB::example_type::slot); - lp.delete_label(label.get()); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions.size(), 2); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[0], 3); + BOOST_CHECK_EQUAL(label->ccb().explicit_included_actions[1], 4); + BOOST_CHECK_CLOSE(label->ccb().outcome->cost, -2.0f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities.size(), 3); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities[0].action, 1); + BOOST_CHECK_CLOSE(label->ccb().outcome->probabilities[0].score, .5f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities[1].action, 2); + BOOST_CHECK_CLOSE(label->ccb().outcome->probabilities[1].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().outcome->probabilities[2].action, 3); + BOOST_CHECK_CLOSE(label->ccb().outcome->probabilities[2].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(label->ccb().type, CCB::example_type::slot); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); BOOST_REQUIRE_THROW(parse_label(lp, &p, "shared", *label.get()), VW::vw_exception); - lp.delete_label(label.get()); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); BOOST_REQUIRE_THROW(parse_label(lp, &p, "other shared", *label.get()), VW::vw_exception); - lp.delete_label(label.get()); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); BOOST_REQUIRE_THROW(parse_label(lp, &p, "other", *label.get()), VW::vw_exception); - lp.delete_label(label.get()); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); BOOST_REQUIRE_THROW(parse_label(lp, &p, "ccb unknown", *label.get()), VW::vw_exception); - lp.delete_label(label.get()); } { - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); BOOST_REQUIRE_THROW(parse_label(lp, &p, "ccb slot 1:1.0:0.5,4:0.7", *label.get()), VW::vw_exception); - lp.delete_label(label.get()); } - p.words.delete_v(); - p.parse_name.delete_v(); } BOOST_AUTO_TEST_CASE(ccb_cache_label) @@ -122,67 +108,51 @@ BOOST_AUTO_TEST_CASE(ccb_cache_label) //io.init(); TODO: figure out and fix leak caused by double init() parser p{8 /*ring_size*/, false /*strict parse*/}; - p.words = v_init(); - p.parse_name = v_init(); auto lp = CCB::ccb_label_parser; - auto label = scoped_calloc_or_throw(); + auto label = scoped_calloc_or_throw(); parse_label(lp, &p, "ccb slot 1:-2.0:0.5,2:0.25,3:0.25 3,4", *label.get()); - lp.cache_label(label.get(), io); + lp.cache_label(*label.get(), io); io.space.end() = io.head; io.head = io.space.begin(); - auto uncached_label = scoped_calloc_or_throw(); - lp.default_label(uncached_label.get()); - lp.read_cached_label(nullptr, uncached_label.get(), io); - - BOOST_CHECK_EQUAL(uncached_label->explicit_included_actions.size(), 2); - BOOST_CHECK_EQUAL(uncached_label->explicit_included_actions[0], 3); - BOOST_CHECK_EQUAL(uncached_label->explicit_included_actions[1], 4); - BOOST_CHECK_CLOSE(uncached_label->outcome->cost, -2.0f, FLOAT_TOL); - BOOST_CHECK_EQUAL(uncached_label->outcome->probabilities.size(), 3); - BOOST_CHECK_EQUAL(uncached_label->outcome->probabilities[0].action, 1); - BOOST_CHECK_CLOSE(uncached_label->outcome->probabilities[0].score, .5f, FLOAT_TOL); - BOOST_CHECK_EQUAL(uncached_label->outcome->probabilities[1].action, 2); - BOOST_CHECK_CLOSE(uncached_label->outcome->probabilities[1].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(uncached_label->outcome->probabilities[2].action, 3); - BOOST_CHECK_CLOSE(uncached_label->outcome->probabilities[2].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(uncached_label->type, CCB::example_type::slot); - lp.delete_label(label.get()); - lp.delete_label(uncached_label.get()); - p.words.delete_v(); - p.parse_name.delete_v(); + auto uncached_label = scoped_calloc_or_throw(); + lp.read_cached_label(nullptr, *uncached_label.get(), io); + + BOOST_CHECK_EQUAL(uncached_label->ccb().explicit_included_actions.size(), 2); + BOOST_CHECK_EQUAL(uncached_label->ccb().explicit_included_actions[0], 3); + BOOST_CHECK_EQUAL(uncached_label->ccb().explicit_included_actions[1], 4); + BOOST_CHECK_CLOSE(uncached_label->ccb().outcome->cost, -2.0f, FLOAT_TOL); + BOOST_CHECK_EQUAL(uncached_label->ccb().outcome->probabilities.size(), 3); + BOOST_CHECK_EQUAL(uncached_label->ccb().outcome->probabilities[0].action, 1); + BOOST_CHECK_CLOSE(uncached_label->ccb().outcome->probabilities[0].score, .5f, FLOAT_TOL); + BOOST_CHECK_EQUAL(uncached_label->ccb().outcome->probabilities[1].action, 2); + BOOST_CHECK_CLOSE(uncached_label->ccb().outcome->probabilities[1].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(uncached_label->ccb().outcome->probabilities[2].action, 3); + BOOST_CHECK_CLOSE(uncached_label->ccb().outcome->probabilities[2].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(uncached_label->ccb().type, CCB::example_type::slot); } BOOST_AUTO_TEST_CASE(ccb_copy_label) { parser p{8 /*ring_size*/, false /*strict parse*/}; - p.words = v_init(); - p.parse_name = v_init(); auto lp = CCB::ccb_label_parser; - auto label = scoped_calloc_or_throw(); - parse_label(lp, &p, "ccb slot 1:-2.0:0.5,2:0.25,3:0.25 3,4", *label.get()); - - auto copied_to = scoped_calloc_or_throw(); - lp.default_label(copied_to.get()); - - lp.copy_label(copied_to.get(), label.get()); - - BOOST_CHECK_EQUAL(copied_to->explicit_included_actions.size(), 2); - BOOST_CHECK_EQUAL(copied_to->explicit_included_actions[0], 3); - BOOST_CHECK_EQUAL(copied_to->explicit_included_actions[1], 4); - BOOST_CHECK_CLOSE(copied_to->outcome->cost, -2.0f, FLOAT_TOL); - BOOST_CHECK_EQUAL(copied_to->outcome->probabilities.size(), 3); - BOOST_CHECK_EQUAL(copied_to->outcome->probabilities[0].action, 1); - BOOST_CHECK_CLOSE(copied_to->outcome->probabilities[0].score, .5f, FLOAT_TOL); - BOOST_CHECK_EQUAL(copied_to->outcome->probabilities[1].action, 2); - BOOST_CHECK_CLOSE(copied_to->outcome->probabilities[1].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(copied_to->outcome->probabilities[2].action, 3); - BOOST_CHECK_CLOSE(copied_to->outcome->probabilities[2].score, .25f, FLOAT_TOL); - BOOST_CHECK_EQUAL(copied_to->type, CCB::example_type::slot); - lp.delete_label(label.get()); - lp.delete_label(copied_to.get()); - p.words.delete_v(); - p.parse_name.delete_v(); + polylabel label; + parse_label(lp, &p, "ccb slot 1:-2.0:0.5,2:0.25,3:0.25 3,4", label); + + polylabel copied_to = label; + + BOOST_CHECK_EQUAL(copied_to.ccb().explicit_included_actions.size(), 2); + BOOST_CHECK_EQUAL(copied_to.ccb().explicit_included_actions[0], 3); + BOOST_CHECK_EQUAL(copied_to.ccb().explicit_included_actions[1], 4); + BOOST_CHECK_CLOSE(copied_to.ccb().outcome->cost, -2.0f, FLOAT_TOL); + BOOST_CHECK_EQUAL(copied_to.ccb().outcome->probabilities.size(), 3); + BOOST_CHECK_EQUAL(copied_to.ccb().outcome->probabilities[0].action, 1); + BOOST_CHECK_CLOSE(copied_to.ccb().outcome->probabilities[0].score, .5f, FLOAT_TOL); + BOOST_CHECK_EQUAL(copied_to.ccb().outcome->probabilities[1].action, 2); + BOOST_CHECK_CLOSE(copied_to.ccb().outcome->probabilities[1].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(copied_to.ccb().outcome->probabilities[2].action, 3); + BOOST_CHECK_CLOSE(copied_to.ccb().outcome->probabilities[2].score, .25f, FLOAT_TOL); + BOOST_CHECK_EQUAL(copied_to.ccb().type, CCB::example_type::slot); } diff --git a/test/unit_test/ccb_test.cc b/test/unit_test/ccb_test.cc index 0bd27e3aee0..82a2fd394ba 100644 --- a/test/unit_test/ccb_test.cc +++ b/test/unit_test/ccb_test.cc @@ -56,7 +56,7 @@ BOOST_AUTO_TEST_CASE(ccb_explicit_included_actions_no_overlap) vw.predict(examples); - auto& decision_scores = examples[0]->pred.decision_scores; + auto& decision_scores = examples[0]->pred.decision_scores(); BOOST_CHECK_EQUAL(decision_scores.size(), 3); BOOST_CHECK_EQUAL(decision_scores[0].size(), 1); diff --git a/test/unit_test/dsjson_parser_test.cc b/test/unit_test/dsjson_parser_test.cc index 1fb95b4bc03..0889c3aaa7d 100644 --- a/test/unit_test/dsjson_parser_test.cc +++ b/test/unit_test/dsjson_parser_test.cc @@ -11,7 +11,7 @@ multi_ex parse_dsjson(vw& all, std::string line) { - auto examples = v_init(); + v_array examples; examples.push_back(&VW::get_unused_example(&all)); DecisionServiceInteraction interaction; @@ -22,7 +22,6 @@ multi_ex parse_dsjson(vw& all, std::string line) for (size_t i = 0; i < examples.size(); ++i) { result.push_back(examples[i]); } - examples.delete_v(); return result; } @@ -96,18 +95,18 @@ BOOST_AUTO_TEST_CASE(parse_dsjson_cb) BOOST_CHECK_EQUAL(examples.size(), 4); // Shared example - BOOST_CHECK_EQUAL(examples[0]->l.cb.costs.size(), 1); - BOOST_CHECK_CLOSE(examples[0]->l.cb.costs[0].probability, -1.f, FLOAT_TOL); - BOOST_CHECK_CLOSE(examples[0]->l.cb.costs[0].cost, FLT_MAX, FLOAT_TOL); + BOOST_CHECK_EQUAL(examples[0]->l.cb().costs.size(), 1); + BOOST_CHECK_CLOSE(examples[0]->l.cb().costs[0].probability, -1.f, FLOAT_TOL); + BOOST_CHECK_CLOSE(examples[0]->l.cb().costs[0].cost, FLT_MAX, FLOAT_TOL); // Action examples - BOOST_CHECK_EQUAL(examples[1]->l.cb.costs.size(), 0); - BOOST_CHECK_EQUAL(examples[2]->l.cb.costs.size(), 1); - BOOST_CHECK_EQUAL(examples[3]->l.cb.costs.size(), 0); + BOOST_CHECK_EQUAL(examples[1]->l.cb().costs.size(), 0); + BOOST_CHECK_EQUAL(examples[2]->l.cb().costs.size(), 1); + BOOST_CHECK_EQUAL(examples[3]->l.cb().costs.size(), 0); - BOOST_CHECK_CLOSE(examples[2]->l.cb.costs[0].probability, 0.8166667, FLOAT_TOL); - BOOST_CHECK_CLOSE(examples[2]->l.cb.costs[0].cost, -1.0, FLOAT_TOL); - BOOST_CHECK_EQUAL(examples[2]->l.cb.costs[0].action, 2); + BOOST_CHECK_CLOSE(examples[2]->l.cb().costs[0].probability, 0.8166667, FLOAT_TOL); + BOOST_CHECK_CLOSE(examples[2]->l.cb().costs[0].cost, -1.0, FLOAT_TOL); + BOOST_CHECK_EQUAL(examples[2]->l.cb().costs[0].action, 2); VW::finish_example(*vw, examples); VW::finish(*vw); } @@ -167,13 +166,13 @@ BOOST_AUTO_TEST_CASE(parse_dsjson_ccb) auto examples = parse_dsjson(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 5); - BOOST_CHECK_EQUAL(examples[0]->l.conditional_contextual_bandit.type, CCB::example_type::shared); - BOOST_CHECK_EQUAL(examples[1]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[2]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[3]->l.conditional_contextual_bandit.type, CCB::example_type::slot); - BOOST_CHECK_EQUAL(examples[4]->l.conditional_contextual_bandit.type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[0]->l.ccb().type, CCB::example_type::shared); + BOOST_CHECK_EQUAL(examples[1]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[2]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[3]->l.ccb().type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[4]->l.ccb().type, CCB::example_type::slot); - auto label1 = examples[3]->l.conditional_contextual_bandit; + auto& label1 = examples[3]->l.ccb(); BOOST_CHECK_EQUAL(label1.explicit_included_actions.size(), 2); BOOST_CHECK_EQUAL(label1.explicit_included_actions[0], 1); BOOST_CHECK_EQUAL(label1.explicit_included_actions[1], 2); @@ -182,7 +181,7 @@ BOOST_AUTO_TEST_CASE(parse_dsjson_ccb) BOOST_CHECK_EQUAL(label1.outcome->probabilities[0].action, 1); BOOST_CHECK_CLOSE(label1.outcome->probabilities[0].score, .25f, .0001f); - auto label2 = examples[4]->l.conditional_contextual_bandit; + auto& label2 = examples[4]->l.ccb(); BOOST_CHECK_EQUAL(label2.explicit_included_actions.size(), 0); BOOST_CHECK_CLOSE(label2.outcome->cost, 4.f, .0001f); BOOST_CHECK_EQUAL(label2.outcome->probabilities.size(), 2); @@ -261,13 +260,13 @@ BOOST_AUTO_TEST_CASE(parse_dsjson_cb_as_ccb) auto examples = parse_dsjson(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 5); - BOOST_CHECK_EQUAL(examples[0]->l.conditional_contextual_bandit.type, CCB::example_type::shared); - BOOST_CHECK_EQUAL(examples[1]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[2]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[3]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[4]->l.conditional_contextual_bandit.type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[0]->l.ccb().type, CCB::example_type::shared); + BOOST_CHECK_EQUAL(examples[1]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[2]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[3]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[4]->l.ccb().type, CCB::example_type::slot); - auto label2 = examples[4]->l.conditional_contextual_bandit; + auto& label2 = examples[4]->l.ccb(); BOOST_CHECK_EQUAL(label2.explicit_included_actions.size(), 0); BOOST_CHECK_CLOSE(label2.outcome->cost, -1.f, .0001f); BOOST_CHECK_EQUAL(label2.outcome->probabilities.size(), 1); diff --git a/test/unit_test/json_parser_test.cc b/test/unit_test/json_parser_test.cc index 0a810f879e2..cb93da1329f 100644 --- a/test/unit_test/json_parser_test.cc +++ b/test/unit_test/json_parser_test.cc @@ -11,7 +11,7 @@ multi_ex parse_json(vw& all, std::string line) { - auto examples = v_init(); + v_array examples; examples.push_back(&VW::get_unused_example(&all)); VW::read_line_json( all, examples, (char*)line.c_str(), (VW::example_factory_t)&VW::get_unused_example, (void*)&all); @@ -20,7 +20,6 @@ multi_ex parse_json(vw& all, std::string line) for (size_t i = 0; i < examples.size(); ++i) { result.push_back(examples[i]); } - examples.delete_v(); return result; } @@ -42,7 +41,7 @@ BOOST_AUTO_TEST_CASE(parse_json_simple) auto examples = parse_json(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 1); - BOOST_CHECK_CLOSE(examples[0]->l.simple.label, 1.f, FLOAT_TOL); + BOOST_CHECK_CLOSE(examples[0]->l.simple().label, 1.f, FLOAT_TOL); VW::finish_example(*vw, examples); VW::finish(*vw); } @@ -81,18 +80,18 @@ BOOST_AUTO_TEST_CASE(parse_json_cb) auto examples = parse_json(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 4); - BOOST_CHECK_EQUAL(examples[0]->l.cb.costs.size(), 1); - BOOST_CHECK_CLOSE(examples[0]->l.cb.costs[0].probability, -1.f, FLOAT_TOL); - BOOST_CHECK_CLOSE(examples[0]->l.cb.costs[0].cost, FLT_MAX, FLOAT_TOL); + BOOST_CHECK_EQUAL(examples[0]->l.cb().costs.size(), 1); + BOOST_CHECK_CLOSE(examples[0]->l.cb().costs[0].probability, -1.f, FLOAT_TOL); + BOOST_CHECK_CLOSE(examples[0]->l.cb().costs[0].cost, FLT_MAX, FLOAT_TOL); // Action examples - BOOST_CHECK_EQUAL(examples[1]->l.cb.costs.size(), 1); - BOOST_CHECK_EQUAL(examples[2]->l.cb.costs.size(), 0); - BOOST_CHECK_EQUAL(examples[3]->l.cb.costs.size(), 0); + BOOST_CHECK_EQUAL(examples[1]->l.cb().costs.size(), 1); + BOOST_CHECK_EQUAL(examples[2]->l.cb().costs.size(), 0); + BOOST_CHECK_EQUAL(examples[3]->l.cb().costs.size(), 0); - BOOST_CHECK_CLOSE(examples[1]->l.cb.costs[0].probability, 0.5, FLOAT_TOL); - BOOST_CHECK_CLOSE(examples[1]->l.cb.costs[0].cost, 1.0, FLOAT_TOL); - BOOST_CHECK_EQUAL(examples[1]->l.cb.costs[0].action, 1); + BOOST_CHECK_CLOSE(examples[1]->l.cb().costs[0].probability, 0.5, FLOAT_TOL); + BOOST_CHECK_CLOSE(examples[1]->l.cb().costs[0].cost, 1.0, FLOAT_TOL); + BOOST_CHECK_EQUAL(examples[1]->l.cb().costs[0].action, 1); VW::finish_example(*vw, examples); VW::finish(*vw); } @@ -154,16 +153,16 @@ BOOST_AUTO_TEST_CASE(parse_json_ccb) auto examples = parse_json(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 8); - BOOST_CHECK_EQUAL(examples[0]->l.conditional_contextual_bandit.type, CCB::example_type::shared); - BOOST_CHECK_EQUAL(examples[1]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[2]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[3]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[4]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[5]->l.conditional_contextual_bandit.type, CCB::example_type::slot); - BOOST_CHECK_EQUAL(examples[6]->l.conditional_contextual_bandit.type, CCB::example_type::slot); - BOOST_CHECK_EQUAL(examples[7]->l.conditional_contextual_bandit.type, CCB::example_type::slot); - - auto label1 = examples[5]->l.conditional_contextual_bandit; + BOOST_CHECK_EQUAL(examples[0]->l.ccb().type, CCB::example_type::shared); + BOOST_CHECK_EQUAL(examples[1]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[2]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[3]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[4]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[5]->l.ccb().type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[6]->l.ccb().type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[7]->l.ccb().type, CCB::example_type::slot); + + auto& label1 = examples[5]->l.ccb(); BOOST_CHECK_EQUAL(label1.explicit_included_actions.size(), 2); BOOST_CHECK_EQUAL(label1.explicit_included_actions[0], 1); BOOST_CHECK_EQUAL(label1.explicit_included_actions[1], 2); @@ -172,11 +171,11 @@ BOOST_AUTO_TEST_CASE(parse_json_ccb) BOOST_CHECK_EQUAL(label1.outcome->probabilities[0].action, 1); BOOST_CHECK_CLOSE(label1.outcome->probabilities[0].score, .25f, .0001f); - auto label2 = examples[6]->l.conditional_contextual_bandit; + auto& label2 = examples[6]->l.ccb(); BOOST_CHECK_EQUAL(label2.explicit_included_actions.size(), 0); BOOST_CHECK(label2.outcome == nullptr); - auto label3 = examples[7]->l.conditional_contextual_bandit; + auto& label3 = examples[7]->l.ccb(); BOOST_CHECK_EQUAL(label3.explicit_included_actions.size(), 0); BOOST_CHECK_CLOSE(label3.outcome->cost, 4.f, .0001f); BOOST_CHECK_EQUAL(label3.outcome->probabilities.size(), 2); @@ -222,13 +221,13 @@ BOOST_AUTO_TEST_CASE(parse_json_cb_as_ccb) auto examples = parse_json(*vw, json_text); BOOST_CHECK_EQUAL(examples.size(), 5); - BOOST_CHECK_EQUAL(examples[0]->l.conditional_contextual_bandit.type, CCB::example_type::shared); - BOOST_CHECK_EQUAL(examples[1]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[2]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[3]->l.conditional_contextual_bandit.type, CCB::example_type::action); - BOOST_CHECK_EQUAL(examples[4]->l.conditional_contextual_bandit.type, CCB::example_type::slot); + BOOST_CHECK_EQUAL(examples[0]->l.ccb().type, CCB::example_type::shared); + BOOST_CHECK_EQUAL(examples[1]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[2]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[3]->l.ccb().type, CCB::example_type::action); + BOOST_CHECK_EQUAL(examples[4]->l.ccb().type, CCB::example_type::slot); - auto label1 = examples[4]->l.conditional_contextual_bandit; + auto& label1 = examples[4]->l.ccb(); BOOST_CHECK_EQUAL(label1.explicit_included_actions.size(), 0); BOOST_CHECK_CLOSE(label1.outcome->cost, 1.f, .0001f); BOOST_CHECK_EQUAL(label1.outcome->probabilities.size(), 1); diff --git a/test/unit_test/prediction_test.cc b/test/unit_test/prediction_test.cc index 1a58fda3a29..d81e08c802b 100644 --- a/test/unit_test/prediction_test.cc +++ b/test/unit_test/prediction_test.cc @@ -20,7 +20,7 @@ BOOST_AUTO_TEST_CASE(predict_modifying_state) vw.learn(learn_example); vw.finish_example(learn_example); vw.predict(predict_example); - prediction_one = predict_example.pred.scalar; + prediction_one = predict_example.pred.scalar(); vw.finish_example(predict_example); VW::finish(vw); } @@ -35,7 +35,7 @@ BOOST_AUTO_TEST_CASE(predict_modifying_state) vw.learn(learn_example); vw.finish_example(learn_example); vw.predict(predict_example); - prediction_two = predict_example.pred.scalar; + prediction_two = predict_example.pred.scalar(); vw.finish_example(predict_example); VW::finish(vw); } diff --git a/vowpalwabbit/CMakeLists.txt b/vowpalwabbit/CMakeLists.txt index 6ebbc844138..bf083af75af 100644 --- a/vowpalwabbit/CMakeLists.txt +++ b/vowpalwabbit/CMakeLists.txt @@ -48,7 +48,7 @@ set(vw_all_sources ccb_label.cc classweight.cc comp_io.cc conditional_contextual_bandit.cc confidence.cc cost_sensitive.cc cs_active.cc csoaa.cc distributionally_robust.cc ect.cc example.cc explore_eval.cc ftrl.cc gd_mf.cc gd.cc gen_cs_example.cc global_data.cc interact.cc interactions.cc io_buf.cc kernel_svm.cc - label_dictionary.cc lda_core.cc learner.cc log_multi.cc loss_functions.cc lrq.cc lrqfa.cc + label_dictionary.cc label_parser.cc lda_core.cc learner.cc log_multi.cc loss_functions.cc lrq.cc lrqfa.cc marginal.cc memory_tree.cc mf.cc multiclass.cc multilabel_oaa.cc multilabel.cc mwt.cc network.cc nn.cc no_label.cc noop.cc oaa.cc OjaNewton.cc options_boost_po.cc options_serializer_boost_po.cc parse_args.cc parse_example.cc parse_primitives.cc parse_regressor.cc parser.cc print.cc rand48.cc diff --git a/vowpalwabbit/OjaNewton.cc b/vowpalwabbit/OjaNewton.cc index 9f5886c297a..36232f9caa5 100644 --- a/vowpalwabbit/OjaNewton.cc +++ b/vowpalwabbit/OjaNewton.cc @@ -47,7 +47,7 @@ struct OjaNewton float* vv; float* tmp; - example** buffer; + std::vector buffer; float* weight_buffer; struct update_data data; @@ -345,7 +345,6 @@ struct OjaNewton free(ev); free(b); free(D); - free(buffer); free(weight_buffer); free(zv); free(vv); @@ -368,8 +367,19 @@ struct OjaNewton } }; -void keep_example(vw& all, OjaNewton& /* ON */, example& ec) { output_and_account_example(all, ec); } +void keep_example_but_delete_after_epoch_processed(vw& all, OjaNewton& ON, example& ec) +{ + output_and_account_example(all, ec); + if (ON.cnt == ON.epoch_size) + { + ON.cnt = 0; + for (auto example_ptr : ON.buffer) + { + VW::finish_example(*ON.all, *example_ptr); + } + } +} void make_pred(update_data& data, float x, float& wref) { int m = data.ON->m; @@ -392,7 +402,7 @@ void predict(OjaNewton& ON, base_learner&, example& ec) ON.data.prediction = 0; GD::foreach_feature(*ON.all, ec, ON.data); ec.partial_prediction = (float)ON.data.prediction; - ec.pred.scalar = GD::finalize_prediction(ON.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(ON.all->sd, ec.partial_prediction); } void update_Z_and_wbar(update_data& data, float x, float& wref) @@ -454,7 +464,7 @@ void learn(OjaNewton& ON, base_learner& base, example& ec) predict(ON, base, ec); update_data& data = ON.data; - data.g = ON.all->loss->first_derivative(ON.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.l.simple.weight; + data.g = ON.all->loss->first_derivative(ON.all->sd, ec.pred.scalar(), ec.l.simple().label) * ec.l.simple().weight; data.g /= 2; // for half square loss if (ON.normalize) @@ -493,15 +503,6 @@ void learn(OjaNewton& ON, base_learner& base, example& ec) ON.update_b(); ON.check(); - - if (ON.cnt == ON.epoch_size) - { - ON.cnt = 0; - for (int k = 0; k < ON.epoch_size; k++) - { - VW::finish_example(*ON.all, *ON.buffer[k]); - } - } } void save_load(OjaNewton& ON, io_buf& model_file, bool read, bool text) @@ -582,7 +583,7 @@ base_learner* OjaNewton_setup(options_i& options, vw& all) ON->D[i] = 1; } - ON->buffer = calloc_or_throw(ON->epoch_size); + ON->buffer.resize(ON->epoch_size, nullptr); ON->weight_buffer = calloc_or_throw(ON->epoch_size); ON->zv = calloc_or_throw(ON->m + 1); @@ -598,6 +599,7 @@ base_learner* OjaNewton_setup(options_i& options, vw& all) learner& l = init_learner(ON, learn, predict, all.weights.stride()); l.set_save_load(save_load); - l.set_finish_example(keep_example); + l.set_finish_example(keep_example_but_delete_after_epoch_processed); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/action_score.cc b/vowpalwabbit/action_score.cc index 16121b6a8dc..06cd70d5924 100644 --- a/vowpalwabbit/action_score.cc +++ b/vowpalwabbit/action_score.cc @@ -11,7 +11,7 @@ namespace ACTION_SCORE { -void print_action_score(int f, v_array& a_s, v_array& tag) +void print_action_score(int f, const v_array& a_s, const v_array& tag) { if (f >= 0) { @@ -31,11 +31,4 @@ void print_action_score(int f, v_array& a_s, v_array& tag) std::cerr << "write error: " << strerror(errno) << std::endl; } } - -void delete_action_scores(void* v) -{ - v_array* cs = (v_array*)v; - cs->delete_v(); -} - } // namespace ACTION_SCORE diff --git a/vowpalwabbit/action_score.h b/vowpalwabbit/action_score.h index 29a6b010fda..8e052214d25 100644 --- a/vowpalwabbit/action_score.h +++ b/vowpalwabbit/action_score.h @@ -78,7 +78,6 @@ inline int score_comp(const void* p1, const void* p2) inline int reverse_order(const void* p1, const void* p2) { return score_comp(p2, p1); } -void print_action_score(int f, v_array& a_s, v_array&); +void print_action_score(int f, const v_array& a_s, const v_array&); -void delete_action_scores(void* v); } // namespace ACTION_SCORE diff --git a/vowpalwabbit/active.cc b/vowpalwabbit/active.cc index 5817ea659d6..ddee95e5c4e 100644 --- a/vowpalwabbit/active.cc +++ b/vowpalwabbit/active.cc @@ -57,7 +57,7 @@ void predict_or_learn_simulation(active& a, single_learner& base, example& ec) float k = (float)all.sd->t; float threshold = 0.f; - ec.confidence = fabsf(ec.pred.scalar - threshold) / base.sensitivity(ec); + ec.confidence = fabsf(ec.pred.scalar() - threshold) / base.sensitivity(ec); float importance = query_decision(a, ec.confidence, k); if (importance > 0) @@ -68,7 +68,7 @@ void predict_or_learn_simulation(active& a, single_learner& base, example& ec) } else { - ec.l.simple.label = FLT_MAX; + ec.l.simple().label = FLT_MAX; ec.weight = 0.f; } } @@ -82,10 +82,10 @@ void predict_or_learn_active(active& a, single_learner& base, example& ec) else base.predict(ec); - if (ec.l.simple.label == FLT_MAX) + if (ec.l.simple().label == FLT_MAX) { float threshold = (a.all->sd->max_label + a.all->sd->min_label) * 0.5f; - ec.confidence = fabsf(ec.pred.scalar - threshold) / base.sensitivity(ec); + ec.confidence = fabsf(ec.pred.scalar() - threshold) / base.sensitivity(ec); } } @@ -109,7 +109,7 @@ void active_print_result(int f, float res, float weight, v_array tag) void output_and_account_example(vw& all, active& a, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); all.sd->update(ec.test_only, ld.label != FLT_MAX, ec.loss, ec.weight, ec.num_features); if (ld.label != FLT_MAX && !ec.test_only) @@ -123,7 +123,7 @@ void output_and_account_example(vw& all, active& a, example& ec) all.print_by_ref(all.raw_prediction, ec.partial_prediction, -1, ec.tag); for (auto i : all.final_prediction_sink) { - active_print_result(i, ec.pred.scalar, ai, ec.tag); + active_print_result(i, ec.pred.scalar(), ai, ec.tag); } print_update(all, ec); @@ -171,5 +171,7 @@ base_learner* active_setup(options_i& options, vw& all) l->set_finish_example(return_active_example); } + l->label_type = label_type_t::simple; + return make_base(*l); } diff --git a/vowpalwabbit/active_cover.cc b/vowpalwabbit/active_cover.cc index b9de29eada6..056e5151d13 100644 --- a/vowpalwabbit/active_cover.cc +++ b/vowpalwabbit/active_cover.cc @@ -53,7 +53,7 @@ bool dis_test(vw& all, example& ec, single_learner& base, float /* prediction */ // Get loss difference float middle = 0.f; - ec.confidence = fabsf(ec.pred.scalar - middle) / base.sensitivity(ec); + ec.confidence = fabsf(ec.pred.scalar() - middle) / base.sensitivity(ec); float k = (float)all.sd->t; float loss_delta = ec.confidence / k; @@ -112,7 +112,7 @@ float query_decision(active_cover& a, single_learner& l, example& ec, float pred for (size_t i = 0; i < a.cover_size; i++) { l.predict(ec, i + 1); - q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]); + q2 += ((float)(sign(ec.pred.scalar()) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]); } p = std::sqrt(q2) / (1 + std::sqrt(q2)); @@ -141,10 +141,10 @@ void predict_or_learn_active_cover(active_cover& a, single_learner& base, exampl { vw& all = *a.all; - float prediction = ec.pred.scalar; + float prediction = ec.pred.scalar(); float t = (float)a.all->sd->t; float ec_input_weight = ec.weight; - float ec_input_label = ec.l.simple.label; + float ec_input_label = ec.l.simple().label; // Compute threshold defining allowed set A float threshold = get_threshold((float)all.sd->sum_loss, t, a.active_c0, a.alpha); @@ -155,7 +155,7 @@ void predict_or_learn_active_cover(active_cover& a, single_learner& base, exampl // Query (or not) if (!in_dis) // Use predicted label { - ec.l.simple.label = sign(prediction); + ec.l.simple().label = sign(prediction); ec.weight = ec_input_weight; base.learn(ec, 0); } @@ -163,21 +163,21 @@ void predict_or_learn_active_cover(active_cover& a, single_learner& base, exampl { all.sd->queries += 1; ec.weight = ec_input_weight * importance; - ec.l.simple.label = ec_input_label; + ec.l.simple().label = ec_input_label; base.learn(ec, 0); } else // skipped example { // Make sure the loss computation does not include // skipped examples - ec.l.simple.label = FLT_MAX; + ec.l.simple().label = FLT_MAX; ec.weight = 0; } // Update the learners in the cover and their weights float q2 = 4.f * pmin * pmin; float p, s, cost, cost_delta = 0; - float ec_output_label = ec.l.simple.label; + float ec_output_label = ec.l.simple().label; float ec_output_weight = ec.weight; float r = 2.f * threshold * t * a.alpha / a.active_c0 / a.beta_scale; @@ -206,7 +206,7 @@ void predict_or_learn_active_cover(active_cover& a, single_learner& base, exampl // Choose min-cost label as the label // Set importance weight to be the cost difference - ec.l.simple.label = -1.f * sign(cost_delta) * sign(prediction); + ec.l.simple().label = -1.f * sign(cost_delta) * sign(prediction); ec.weight = ec_input_weight * fabs(cost_delta); // Update learner @@ -214,20 +214,20 @@ void predict_or_learn_active_cover(active_cover& a, single_learner& base, exampl base.predict(ec, i + 1); // Update numerator of lambda - a.lambda_n[i] += 2.f * ((float)(sign(ec.pred.scalar) != sign(prediction))) * cost_delta; + a.lambda_n[i] += 2.f * ((float)(sign(ec.pred.scalar()) != sign(prediction))) * cost_delta; a.lambda_n[i] = fmax(a.lambda_n[i], 0.f); // Update denominator of lambda - a.lambda_d[i] += ((float)(sign(ec.pred.scalar) != sign(prediction) && in_dis)) / (float)pow(q2, 1.5); + a.lambda_d[i] += ((float)(sign(ec.pred.scalar()) != sign(prediction) && in_dis)) / (float)pow(q2, 1.5); // Accumulating weights of learners in the cover - q2 += ((float)(sign(ec.pred.scalar) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]); + q2 += ((float)(sign(ec.pred.scalar()) != sign(prediction))) * (a.lambda_n[i] / a.lambda_d[i]); } // Restoring the weight, the label, and the prediction ec.weight = ec_output_weight; - ec.l.simple.label = ec_output_label; - ec.pred.scalar = prediction; + ec.l.simple().label = ec_output_label; + ec.pred.scalar() = prediction; } } @@ -281,6 +281,6 @@ base_learner* active_cover_setup(options_i& options, vw& all) // Create new learner learner& l = init_learner( data, base, predict_or_learn_active_cover, predict_or_learn_active_cover, data->cover_size + 1); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/audit_regressor.cc b/vowpalwabbit/audit_regressor.cc index 3478c7d63d8..905693ad507 100644 --- a/vowpalwabbit/audit_regressor.cc +++ b/vowpalwabbit/audit_regressor.cc @@ -269,12 +269,14 @@ LEARNER::base_learner* audit_regressor_setup(options_i& options, vw& all) dat->out_file = new io_buf(); dat->out_file->open_file(out_file.c_str(), all.stdin_off, io_buf::WRITE); + auto base = as_singleline(setup_base(options, all)); LEARNER::learner& ret = - LEARNER::init_learner(dat, as_singleline(setup_base(options, all)), audit_regressor, audit_regressor, 1); + LEARNER::init_learner(dat, base, audit_regressor, audit_regressor, 1); ret.set_end_examples(end_examples); ret.set_finish_example(finish_example); ret.set_finish(finish); ret.set_init_driver(init_driver); + ret.label_type = base->label_type; return LEARNER::make_base(ret); } diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc index 988c34933dd..1b88c546e29 100644 --- a/vowpalwabbit/autolink.cc +++ b/vowpalwabbit/autolink.cc @@ -53,7 +53,7 @@ void VW::autolink::learn(LEARNER::single_learner& base, example& ec) void VW::autolink::prepare_example(LEARNER::single_learner& base, example& ec) { base.predict(ec); - float base_pred = ec.pred.scalar; + float base_pred = ec.pred.scalar(); // Add features of label. ec.indices.push_back(autolink_namespace); @@ -63,7 +63,7 @@ void VW::autolink::prepare_example(LEARNER::single_learner& base, example& ec) if (base_pred != 0.) { fs.push_back(base_pred, AUTOCONSTANT + (i << _stride_shift)); - base_pred *= ec.pred.scalar; + base_pred *= ec.pred.scalar(); } } ec.total_sum_feat_sq += fs.sum_feat_sq; @@ -97,6 +97,9 @@ LEARNER::base_learner* autolink_setup(options_i& options, vw& all) return nullptr; auto autolink_reduction = scoped_calloc_or_throw(d, all.weights.stride_shift()); - return make_base(init_learner( - autolink_reduction, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn)); + auto base = as_singleline(setup_base(options, all)); + auto learner = make_base(init_learner( + autolink_reduction, base, predict_or_learn, predict_or_learn)); + learner->label_type = base->label_type; + return learner; } diff --git a/vowpalwabbit/baseline.cc b/vowpalwabbit/baseline.cc index 6c51d4fb864..01700d55bc0 100644 --- a/vowpalwabbit/baseline.cc +++ b/vowpalwabbit/baseline.cc @@ -72,7 +72,7 @@ struct baseline ~baseline() { if (ec) - VW::dealloc_example(simple_label.delete_label, *ec); + ec->~example(); free(ec); } }; @@ -113,7 +113,7 @@ void predict_or_learn(baseline& data, single_learner& base, example& ec) } VW::copy_example_metadata(/*audit=*/false, data.ec, &ec); base.predict(*data.ec); - ec.l.simple.initial = data.ec->pred.scalar; + ec.l.simple().initial = data.ec->pred.scalar(); base.predict(ec); } else @@ -121,10 +121,10 @@ void predict_or_learn(baseline& data, single_learner& base, example& ec) if (is_learn) { - const float pred = ec.pred.scalar; // save 'safe' prediction + const float pred = ec.pred.scalar(); // save 'safe' prediction // now learn - data.ec->l.simple = ec.l.simple; + data.ec->l.simple() = ec.l.simple(); if (!data.global_only) { // move label & constant features data over to baseline example @@ -150,7 +150,7 @@ void predict_or_learn(baseline& data, single_learner& base, example& ec) base.learn(*data.ec); // regress residual - ec.l.simple.initial = data.ec->pred.scalar; + ec.l.simple().initial = data.ec->pred.scalar(); base.learn(ec); if (!data.global_only) @@ -160,7 +160,7 @@ void predict_or_learn(baseline& data, single_learner& base, example& ec) } // return the safe prediction - ec.pred.scalar = pred; + ec.pred.scalar() = pred; } } @@ -175,15 +175,15 @@ float sensitivity(baseline& data, base_learner& base, example& ec) // sensitivity of baseline term VW::copy_example_metadata(/*audit=*/false, data.ec, &ec); - data.ec->l.simple.label = ec.l.simple.label; - data.ec->pred.scalar = ec.pred.scalar; + data.ec->l.simple().label = ec.l.simple().label; + data.ec->pred.scalar() = ec.pred.scalar(); // std::cout << "before base" << std::endl; const float baseline_sens = base.sensitivity(*data.ec); // std::cout << "base sens: " << baseline_sens << std::endl; // sensitivity of residual as_singleline(&base)->predict(*data.ec); - ec.l.simple.initial = data.ec->pred.scalar; + ec.l.simple().initial = data.ec->pred.scalar(); const float sens = base.sensitivity(ec); // std::cout << " residual sens: " << sens << std::endl; return baseline_sens + sens; @@ -213,7 +213,7 @@ base_learner* baseline_setup(options_i& options, vw& all) return nullptr; // initialize baseline example - data->ec = VW::alloc_examples(simple_label.label_size, 1); + data->ec = VW::alloc_examples(1); data->ec->interactions = &all.interactions; data->all = &all; @@ -227,6 +227,6 @@ base_learner* baseline_setup(options_i& options, vw& all) learner& l = init_learner(data, base, predict_or_learn, predict_or_learn); l.set_sensitivity(sensitivity); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/best_constant.cc b/vowpalwabbit/best_constant.cc index 7b1216f3928..225d418acf5 100644 --- a/vowpalwabbit/best_constant.cc +++ b/vowpalwabbit/best_constant.cc @@ -33,7 +33,7 @@ bool get_best_constant(vw& all, float& best_constant, float& best_constant_loss) else return false; - if ((label1_cnt + label2_cnt) <= 0.) + if ((label1_cnt + label2_cnt) <= 0.f) return false; auto funcName = all.loss->getType(); diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc index 354f0caa200..8efaebc449d 100644 --- a/vowpalwabbit/bfgs.cc +++ b/vowpalwabbit/bfgs.cc @@ -105,7 +105,6 @@ struct bfgs ~bfgs() { - predictions.delete_v(); free(mem); free(rho); free(alpha); @@ -143,7 +142,7 @@ void reset_state(vw& all, bfgs& b, bool zero) // w[2] = step direction // w[3] = preconditioner -constexpr bool test_example(example& ec) noexcept { return ec.l.simple.label == FLT_MAX; } +bool test_example(example& ec) noexcept { return ec.l.simple().label == FLT_MAX; } float bfgs_predict(vw& all, example& ec) { @@ -156,7 +155,7 @@ inline void add_grad(float& d, float f, float& fw) { (&fw)[W_GT] += d * f; } float predict_and_gradient(vw& all, example& ec) { float fp = bfgs_predict(all, ec); - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); all.set_minmax(all.sd, ld.label); float loss_grad = all.loss->first_derivative(all.sd, fp, ld.label) * ec.weight; @@ -169,7 +168,7 @@ inline void add_precond(float& d, float f, float& fw) { (&fw)[W_COND] += d * f * void update_preconditioner(vw& all, example& ec) { - float curvature = all.loss->second_derivative(all.sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + float curvature = all.loss->second_derivative(all.sd, ec.pred.scalar(), ec.l.simple().label) * ec.weight; GD::foreach_feature(all, ec, curvature); } @@ -177,7 +176,7 @@ inline void add_DIR(float& p, const float fx, float& fw) { p += (&fw)[W_DIR] * f float dot_with_direction(vw& all, example& ec) { - float temp = ec.l.simple.initial; + float temp = ec.l.simple().initial; GD::foreach_feature(all, ec, temp); return temp; } @@ -859,7 +858,7 @@ int process_pass(vw& all, bfgs& b) void process_example(vw& all, bfgs& b, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); if (b.first_pass) b.importance_weight_sum += ec.weight; @@ -868,10 +867,10 @@ void process_example(vw& all, bfgs& b, example& ec) /********************************************************************/ if (b.gradient_pass) { - ec.pred.scalar = predict_and_gradient(all, ec); // w[0] & w[1] - ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ec.weight; + ec.pred.scalar() = predict_and_gradient(all, ec); // w[0] & w[1] + ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar(), ld.label) * ec.weight; b.loss_sum += ec.loss; - b.predictions.push_back(ec.pred.scalar); + b.predictions.push_back(ec.pred.scalar()); } /********************************************************************/ /* II) CURVATURE CALCULATION ****************************************/ @@ -881,13 +880,13 @@ void process_example(vw& all, bfgs& b, example& ec) float d_dot_x = dot_with_direction(all, ec); // w[2] if (b.example_number >= b.predictions.size()) // Make things safe in case example source is strange. b.example_number = b.predictions.size() - 1; - ec.pred.scalar = b.predictions[b.example_number]; + ec.pred.scalar() = b.predictions[b.example_number]; ec.partial_prediction = b.predictions[b.example_number]; - ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ec.weight; + ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar(), ld.label) * ec.weight; float sd = all.loss->second_derivative(all.sd, b.predictions[b.example_number++], ld.label); b.curvature += ((double)d_dot_x) * d_dot_x * sd * ec.weight; } - ec.updated_prediction = ec.pred.scalar; + ec.updated_prediction = ec.pred.scalar(); if (b.preconditioner_pass) update_preconditioner(all, ec); // w[3] @@ -955,7 +954,7 @@ template void predict(bfgs& b, base_learner&, example& ec) { vw* all = b.all; - ec.pred.scalar = bfgs_predict(*all, ec); + ec.pred.scalar() = bfgs_predict(*all, ec); if (audit) GD::print_audit_features(*(b.all), ec); } @@ -1166,6 +1165,7 @@ base_learner* bfgs_setup(options_i& options, vw& all) l->set_save_load(save_load); l->set_init_driver(init_driver); l->set_end_pass(end_pass); + l->label_type = label_type_t::simple; return make_base(*l); } diff --git a/vowpalwabbit/binary.cc b/vowpalwabbit/binary.cc index c6441509c8c..f3fcbc83a90 100644 --- a/vowpalwabbit/binary.cc +++ b/vowpalwabbit/binary.cc @@ -15,16 +15,16 @@ void predict_or_learn(char&, LEARNER::single_learner& base, example& ec) else base.predict(ec); - if (ec.pred.scalar > 0) - ec.pred.scalar = 1; + if (ec.pred.scalar() > 0) + ec.pred.scalar() = 1; else - ec.pred.scalar = -1; + ec.pred.scalar() = -1; - if (ec.l.simple.label != FLT_MAX) + if (ec.l.simple().label != FLT_MAX) { - if (fabs(ec.l.simple.label) != 1.f) - std::cout << "You are using label " << ec.l.simple.label << " not -1 or 1 as loss function expects!" << std::endl; - else if (ec.l.simple.label == ec.pred.scalar) + if (fabs(ec.l.simple().label) != 1.f) + std::cout << "You are using label " << ec.l.simple().label << " not -1 or 1 as loss function expects!" << std::endl; + else if (ec.l.simple().label == ec.pred.scalar()) ec.loss = 0.; else ec.loss = ec.weight; @@ -43,5 +43,6 @@ LEARNER::base_learner* binary_setup(options_i& options, vw& all) LEARNER::learner& ret = LEARNER::init_learner(as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn); + ret.label_type = label_type_t::simple; return make_base(ret); } diff --git a/vowpalwabbit/boosting.cc b/vowpalwabbit/boosting.cc index 17a33f6e9db..0f848254711 100644 --- a/vowpalwabbit/boosting.cc +++ b/vowpalwabbit/boosting.cc @@ -74,7 +74,7 @@ struct boosting template void predict_or_learn(boosting& o, LEARNER::single_learner& base, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); float final_prediction = 0; @@ -112,25 +112,25 @@ void predict_or_learn(boosting& o, LEARNER::single_learner& base, example& ec) base.predict(ec, i); - // ec.pred.scalar is now the i-th learner prediction on this example - s += ld.label * ec.pred.scalar; + // ec.pred.scalar() is now the i-th learner prediction on this example + s += ld.label * ec.pred.scalar(); - final_prediction += ec.pred.scalar; + final_prediction += ec.pred.scalar(); base.learn(ec, i); } else { base.predict(ec, i); - final_prediction += ec.pred.scalar; + final_prediction += ec.pred.scalar(); } } ec.weight = u; ec.partial_prediction = final_prediction; - ec.pred.scalar = sign(final_prediction); + ec.pred.scalar() = sign(final_prediction); - if (ld.label == ec.pred.scalar) + if (ld.label == ec.pred.scalar()) ec.loss = 0.; else ec.loss = ec.weight; @@ -142,7 +142,7 @@ void predict_or_learn(boosting& o, LEARNER::single_learner& base, example& ec) template void predict_or_learn_logistic(boosting& o, LEARNER::single_learner& base, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); float final_prediction = 0; @@ -163,13 +163,13 @@ void predict_or_learn_logistic(boosting& o, LEARNER::single_learner& base, examp base.predict(ec, i); float z; - z = ld.label * ec.pred.scalar; + z = ld.label * ec.pred.scalar(); s += z * o.alpha[i]; - // if ld.label * ec.pred.scalar < 0, learner i made a mistake + // if ld.label * ec.pred.scalar() < 0, learner i made a mistake - final_prediction += ec.pred.scalar * o.alpha[i]; + final_prediction += ec.pred.scalar() * o.alpha[i]; // update alpha o.alpha[i] += eta * z / (1 + correctedExp(s)); @@ -183,15 +183,15 @@ void predict_or_learn_logistic(boosting& o, LEARNER::single_learner& base, examp else { base.predict(ec, i); - final_prediction += ec.pred.scalar * o.alpha[i]; + final_prediction += ec.pred.scalar() * o.alpha[i]; } } ec.weight = u; ec.partial_prediction = final_prediction; - ec.pred.scalar = sign(final_prediction); + ec.pred.scalar() = sign(final_prediction); - if (ld.label == ec.pred.scalar) + if (ld.label == ec.pred.scalar()) ec.loss = 0.; else ec.loss = ec.weight; @@ -200,7 +200,7 @@ void predict_or_learn_logistic(boosting& o, LEARNER::single_learner& base, examp template void predict_or_learn_adaptive(boosting& o, LEARNER::single_learner& base, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); float final_prediction = 0, partial_prediction = 0; @@ -225,16 +225,16 @@ void predict_or_learn_adaptive(boosting& o, LEARNER::single_learner& base, examp base.predict(ec, i); float z; - z = ld.label * ec.pred.scalar; + z = ld.label * ec.pred.scalar(); s += z * o.alpha[i]; if (v_partial_sum <= stopping_point) { - final_prediction += ec.pred.scalar * o.alpha[i]; + final_prediction += ec.pred.scalar() * o.alpha[i]; } - partial_prediction += ec.pred.scalar * o.alpha[i]; + partial_prediction += ec.pred.scalar() * o.alpha[i]; v_partial_sum += o.v[i]; @@ -259,7 +259,7 @@ void predict_or_learn_adaptive(boosting& o, LEARNER::single_learner& base, examp base.predict(ec, i); if (v_partial_sum <= stopping_point) { - final_prediction += ec.pred.scalar * o.alpha[i]; + final_prediction += ec.pred.scalar() * o.alpha[i]; } else { @@ -282,9 +282,9 @@ void predict_or_learn_adaptive(boosting& o, LEARNER::single_learner& base, examp ec.weight = u; ec.partial_prediction = final_prediction; - ec.pred.scalar = sign(final_prediction); + ec.pred.scalar() = sign(final_prediction); - if (ld.label == ec.pred.scalar) + if (ld.label == ec.pred.scalar()) ec.loss = 0.; else ec.loss = ec.weight; @@ -448,6 +448,6 @@ LEARNER::base_learner* boosting_setup(options_i& options, vw& all) THROW("Unrecognized boosting algorithm: \'" << data->alg << "\' Bailing!"); l->set_finish_example(return_example); - + l->label_type = label_type_t::simple; return make_base(*l); } diff --git a/vowpalwabbit/bs.cc b/vowpalwabbit/bs.cc index 51eb0d12807..977e5f098fd 100644 --- a/vowpalwabbit/bs.cc +++ b/vowpalwabbit/bs.cc @@ -24,18 +24,16 @@ struct bs size_t bs_type; float lb; float ub; - std::vector* pred_vec; + std::vector pred_vec; vw* all; // for raw prediction and loss std::shared_ptr _random_state; - - ~bs() { delete pred_vec; } }; void bs_predict_mean(vw& all, example& ec, std::vector& pred_vec) { - ec.pred.scalar = (float)accumulate(pred_vec.cbegin(), pred_vec.cend(), 0.0) / pred_vec.size(); - if (ec.weight > 0 && ec.l.simple.label != FLT_MAX) - ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + ec.pred.scalar() = (float)accumulate(pred_vec.cbegin(), pred_vec.cend(), 0.0) / pred_vec.size(); + if (ec.weight > 0 && ec.l.simple().label != FLT_MAX) + ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar(), ec.l.simple().label) * ec.weight; } void bs_predict_vote(example& ec, std::vector& pred_vec) @@ -124,11 +122,11 @@ void bs_predict_vote(example& ec, std::vector& pred_vec) delete[] pred_vec_int; // ld.prediction = sum_labels/(float)counter; //replace line below for: "avg on votes" and getLoss() - ec.pred.scalar = (float)current_label; + ec.pred.scalar() = (float)current_label; // ec.loss = all.loss->getLoss(all.sd, ld.prediction, ld.label) * ec.weight; //replace line below for: "avg on votes" // and getLoss() - ec.loss = ((ec.pred.scalar == ec.l.simple.label) ? 0.f : 1.f) * ec.weight; + ec.loss = ((ec.pred.scalar() == ec.l.simple().label) ? 0.f : 1.f) * ec.weight; } void print_result(int f, float res, v_array tag, float lb, float ub) @@ -148,7 +146,7 @@ void print_result(int f, float res, v_array tag, float lb, float ub) void output_example(vw& all, bs& d, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); all.sd->update(ec.test_only, ld.label != FLT_MAX, ec.loss, ec.weight, ec.num_features); if (ld.label != FLT_MAX && !ec.test_only) @@ -158,7 +156,7 @@ void output_example(vw& all, bs& d, example& ec) { d.lb = FLT_MAX; d.ub = -FLT_MAX; - for (double v : *d.pred_vec) + for (double v : d.pred_vec) { if (v > d.ub) d.ub = (float)v; @@ -167,7 +165,8 @@ void output_example(vw& all, bs& d, example& ec) } } - for (int sink : all.final_prediction_sink) print_result(sink, ec.pred.scalar, ec.tag, d.lb, d.ub); + for (int sink : all.final_prediction_sink) + print_result(sink, ec.pred.scalar(), ec.tag, d.lb, d.ub); print_update(all, ec); } @@ -181,7 +180,7 @@ void predict_or_learn(bs& d, single_learner& base, example& ec) float weight_temp = ec.weight; std::stringstream outputStringStream; - d.pred_vec->clear(); + d.pred_vec.clear(); for (size_t i = 1; i <= d.B; i++) { @@ -192,7 +191,7 @@ void predict_or_learn(bs& d, single_learner& base, example& ec) else base.predict(ec, i - 1); - d.pred_vec->push_back(ec.pred.scalar); + d.pred_vec.push_back(ec.pred.scalar()); if (shouldOutput) { @@ -207,10 +206,10 @@ void predict_or_learn(bs& d, single_learner& base, example& ec) switch (d.bs_type) { case BS_TYPE_MEAN: - bs_predict_mean(all, ec, *d.pred_vec); + bs_predict_mean(all, ec, d.pred_vec); break; case BS_TYPE_VOTE: - bs_predict_vote(ec, *d.pred_vec); + bs_predict_vote(ec, d.pred_vec); break; default: THROW("Unknown bs_type specified: " << d.bs_type); @@ -256,14 +255,13 @@ base_learner* bs_setup(options_i& options, vw& all) else // by default use mean data->bs_type = BS_TYPE_MEAN; - data->pred_vec = new std::vector(); - data->pred_vec->reserve(data->B); + data->pred_vec.reserve(data->B); data->all = &all; data->_random_state = all.get_random_state(); learner& l = init_learner( data, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn, data->B); l.set_finish_example(finish_example); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/cache.cc b/vowpalwabbit/cache.cc index 011b462b8bd..289e91d4422 100644 --- a/vowpalwabbit/cache.cc +++ b/vowpalwabbit/cache.cc @@ -67,7 +67,8 @@ int read_cached_features(vw* all, v_array& examples) ae->sorted = all->p->sorted_cache; io_buf* input = all->p->input; - size_t total = all->p->lp.read_cached_label(all->p->_shared_data, &ae->l, *input); + all->p->lp.default_label(ae->l); + size_t total = all->p->lp.read_cached_label(all->p->_shared_data, ae->l, *input); if (total == 0) return 0; if (read_cached_tag(*input, ae) == 0) @@ -188,7 +189,7 @@ void output_features(io_buf& cache, unsigned char index, features& fs, uint64_t *(size_t*)storage_size_loc = c - storage_size_loc - sizeof(size_t); } -void cache_tag(io_buf& cache, v_array tag) +void cache_tag(io_buf& cache, const v_array& tag) { char* c; cache.buf_write(c, sizeof(size_t) + tag.size()); diff --git a/vowpalwabbit/cache.h b/vowpalwabbit/cache.h index 696b9b42976..d05a4245956 100644 --- a/vowpalwabbit/cache.h +++ b/vowpalwabbit/cache.h @@ -11,7 +11,7 @@ char* run_len_decode(char* p, size_t& i); char* run_len_encode(char* p, size_t i); int read_cached_features(vw* all, v_array& examples); -void cache_tag(io_buf& cache, v_array tag); +void cache_tag(io_buf& cache, const v_array& tag); void cache_features(io_buf& cache, example* ae, uint64_t mask); void output_byte(io_buf& cache, unsigned char s); void output_features(io_buf& cache, unsigned char index, features& fs, uint64_t mask); diff --git a/vowpalwabbit/cb.cc b/vowpalwabbit/cb.cc index c26581badf7..17d8b1b58fe 100644 --- a/vowpalwabbit/cb.cc +++ b/vowpalwabbit/cb.cc @@ -13,12 +13,12 @@ using namespace LEARNER; namespace CB { -char* bufread_label(CB::label* ld, char* c, io_buf& cache) +char* bufread_label(CB::label& ld, char* c, io_buf& cache) { size_t num = *(size_t*)c; - ld->costs.clear(); + ld.costs.clear(); c += sizeof(size_t); - size_t total = sizeof(cb_class) * num + sizeof(ld->weight); + size_t total = sizeof(cb_class) * num + sizeof(ld.weight); if (cache.buf_read(c, total) < total) { std::cout << "error in demarshal of cost data" << std::endl; @@ -28,17 +28,16 @@ char* bufread_label(CB::label* ld, char* c, io_buf& cache) { cb_class temp = *(cb_class*)c; c += sizeof(cb_class); - ld->costs.push_back(temp); + ld.costs.push_back(temp); } - memcpy(&ld->weight, c, sizeof(ld->weight)); - c += sizeof(ld->weight); + memcpy(&ld.weight, c, sizeof(ld.weight)); + c += sizeof(ld.weight); return c; } -size_t read_cached_label(shared_data*, void* v, io_buf& cache) +size_t read_cached_label(shared_data*, CB::label& ld, io_buf& cache) { - CB::label* ld = (CB::label*)v; - ld->costs.clear(); + ld.costs.clear(); char* c; size_t total = sizeof(size_t); if (cache.buf_read(c, total) < total) @@ -48,71 +47,70 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) return total; } -float weight(void* v) +size_t read_cached_label(shared_data* s, polylabel& v, io_buf& cache) { - CB::label* ld = (CB::label*)v; - return ld->weight; + return CB::read_cached_label(s, v.cb(), cache); } -char* bufcache_label(CB::label* ld, char* c) +float weight(CB::label& ld) { return ld.weight; } + +float weight(polylabel& v) { return CB::weight(v.cb()); } + +char* bufcache_label(CB::label& ld, char* c) { - *(size_t*)c = ld->costs.size(); + *(size_t*)c = ld.costs.size(); c += sizeof(size_t); - for (auto const& cost : ld->costs) + for (auto const& cost : ld.costs) { *(cb_class*)c = cost; c += sizeof(cb_class); } - memcpy(c, &ld->weight, sizeof(ld->weight)); - c += sizeof(ld->weight); + memcpy(c, &ld.weight, sizeof(ld.weight)); + c += sizeof(ld.weight); return c; } -void cache_label(void* v, io_buf& cache) +void cache_label(CB::label& ld, io_buf& cache) { char* c; - CB::label* ld = (CB::label*)v; - cache.buf_write(c, sizeof(size_t) + sizeof(cb_class) * ld->costs.size() + sizeof(ld->weight)); + cache.buf_write(c, sizeof(size_t) + sizeof(cb_class) * ld.costs.size() + sizeof(ld.weight)); bufcache_label(ld, c); } -void default_label(void* v) +void cache_label(polylabel& v, io_buf& cache) { CB::cache_label(v.cb(), cache); } + +void default_label(CB::label& ld) +{ + ld.costs.clear(); + ld.weight = 1; +} + +void default_label(polylabel& v) { - CB::label* ld = (CB::label*)v; - ld->costs.clear(); - ld->weight = 1; + if (v.get_type() != label_type_t::cb) + { + v.reset(); + v.init_as_cb(); + } + CB::default_label(v.cb()); } -bool test_label(void* v) +bool test_label(CB::label& ld) { - CB::label* ld = (CB::label*)v; - if (ld->costs.empty()) + if (ld.costs.empty()) return true; - for (auto const& cost : ld->costs) + for (auto const& cost : ld.costs) if (FLT_MAX != cost.cost && cost.probability > 0.) return false; return true; } -void delete_label(void* v) -{ - CB::label* ld = (CB::label*)v; - ld->costs.delete_v(); -} +bool test_label(polylabel& v) { return CB::test_label(v.cb()); } -void copy_label(void* dst, void* src) +void parse_label(parser* p, shared_data*, CB::label& ld, v_array& words) { - CB::label* ldD = (CB::label*)dst; - CB::label* ldS = (CB::label*)src; - copy_array(ldD->costs, ldS->costs); - ldD->weight = ldS->weight; -} - -void parse_label(parser* p, shared_data*, void* v, v_array& words) -{ - CB::label* ld = (CB::label*)v; - ld->costs.clear(); - ld->weight = 1.0; + ld.costs.clear(); + ld.weight = 1.0; for (auto const& word : words) { @@ -159,20 +157,29 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor std::cerr << "shared feature vectors should not have costs" << std::endl; } - ld->costs.push_back(f); + ld.costs.push_back(f); } } -label_parser cb_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, - test_label, sizeof(label)}; +void parse_label(parser* p, shared_data* sd, polylabel& v, v_array& words) +{ + CB::parse_label(p, sd, v.cb(), words); +} + +label_parser cb_label = {default_label, parse_label, cache_label, read_cached_label,polylabel_delete_label, + weight, polylabel_copy_label, test_label, sizeof(label)}; bool ec_is_example_header(example const& ec) // example headers just have "shared" { - const auto& costs = ec.l.cb.costs; - if (costs.size() != 1) - return false; - if (costs[0].probability == -1.f) - return true; + if (ec.l.get_type() == label_type_t::cb) + { + const auto& costs = ec.l.cb().costs; + if (costs.size() != 1) + return false; + if (costs[0].probability == -1.f) + return true; + } + return false; } @@ -182,7 +189,6 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act { size_t num_features = ec.num_features; - size_t pred = ec.pred.multiclass; if (ec_seq != nullptr) { num_features = 0; @@ -200,94 +206,89 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act if (action_scores) { std::ostringstream pred_buf; + const auto& a_s = ec.pred.action_probs(); pred_buf << std::setw(shared_data::col_current_predict) << std::right << std::setfill(' '); - if (!ec.pred.a_s.empty()) - pred_buf << ec.pred.a_s[0].action << ":" << ec.pred.a_s[0].score << "..."; + if (!a_s.empty()) + pred_buf << ec.pred.action_probs()[0].action << ":" << a_s[0].score << "..."; else pred_buf << "no action"; all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_features, all.progress_add, all.progress_arg); } else + { + size_t pred = ec.pred.multiclass(); all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, (uint32_t)pred, num_features, all.progress_add, all.progress_arg); + } } } } // namespace CB namespace CB_EVAL { -float weight(void* v) +float weight(polylabel& v) { - CB_EVAL::label* ld = (CB_EVAL::label*)v; - return ld->event.weight; + auto& ld = v.cb_eval(); + return ld.event.weight; } -size_t read_cached_label(shared_data* sd, void* v, io_buf& cache) +size_t read_cached_label(shared_data* sd, polylabel& v, io_buf& cache) { - CB_EVAL::label* ld = (CB_EVAL::label*)v; + auto& ld = v.cb_eval(); char* c; size_t total = sizeof(uint32_t); if (cache.buf_read(c, total) < total) return 0; - ld->action = *(uint32_t*)c; + ld.action = *(uint32_t*)c; - return total + CB::read_cached_label(sd, &(ld->event), cache); + return total + CB::read_cached_label(sd, ld.event, cache); } -void cache_label(void* v, io_buf& cache) +void cache_label(polylabel& v, io_buf& cache) { char* c; - CB_EVAL::label* ld = (CB_EVAL::label*)v; + auto& ld = v.cb_eval(); cache.buf_write(c, sizeof(uint32_t)); - *(uint32_t*)c = ld->action; + *(uint32_t*)c = ld.action; - CB::cache_label(&(ld->event), cache); + CB::cache_label(ld.event, cache); } -void default_label(void* v) +void default_label(polylabel& v) { - CB_EVAL::label* ld = (CB_EVAL::label*)v; - CB::default_label(&(ld->event)); - ld->action = 0; -} - -bool test_label(void* v) -{ - CB_EVAL::label* ld = (CB_EVAL::label*)v; - return CB::test_label(&ld->event); -} + if (v.get_type() != label_type_t::cb_eval) + { + v.reset(); + v.init_as_cb_eval(); -void delete_label(void* v) -{ - CB_EVAL::label* ld = (CB_EVAL::label*)v; - CB::delete_label(&(ld->event)); + } + auto& ld = v.cb_eval(); + CB::default_label(ld.event); + ld.action = 0; } -void copy_label(void* dst, void* src) +bool test_label(polylabel& v) { - CB_EVAL::label* ldD = (CB_EVAL::label*)dst; - CB_EVAL::label* ldS = (CB_EVAL::label*)src; - CB::copy_label(&(ldD->event), &(ldS)->event); - ldD->action = ldS->action; + auto& ld = v.cb_eval(); + return CB::test_label(ld.event); } -void parse_label(parser* p, shared_data* sd, void* v, v_array& words) +void parse_label(parser* p, shared_data* sd, polylabel& v, v_array& words) { - CB_EVAL::label* ld = (CB_EVAL::label*)v; + auto& ld = v.cb_eval(); if (words.size() < 2) THROW("Evaluation can not happen without an action and an exploration"); - ld->action = (uint32_t)hashstring(words[0].begin(), words[0].length(), 0); + ld.action = (uint32_t)hashstring(words[0].begin(), words[0].length(), 0); words.begin()++; - CB::parse_label(p, sd, &(ld->event), words); + CB::parse_label(p, sd, ld.event, words); words.begin()--; } -label_parser cb_eval = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, - test_label, sizeof(CB_EVAL::label)}; -} // namespace CB_EVAL +label_parser cb_eval = {default_label, parse_label, cache_label, read_cached_label, polylabel_delete_label, weight, polylabel_copy_label, + test_label, sizeof(CB_EVAL::label)};} // namespace CB_EVAL diff --git a/vowpalwabbit/cb.h b/vowpalwabbit/cb.h index 27893dff99c..a37dec96a56 100644 --- a/vowpalwabbit/cb.h +++ b/vowpalwabbit/cb.h @@ -26,6 +26,9 @@ struct label float weight; }; +bool test_label(label& ld); +void default_label(label& ld); + extern label_parser cb_label; // for learning bool ec_is_example_header(example const& ec); // example headers look like "shared" diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 053cdfad3d4..6481b6af53f 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -69,21 +69,6 @@ struct cb_adf const VW::version_struct* get_model_file_ver() const { return _model_file_ver; } - ~cb_adf() - { - _cb_labels.delete_v(); - for (auto& prepped_cs_label : _prepped_cs_labels) prepped_cs_label.costs.delete_v(); - _prepped_cs_labels.delete_v(); - _cs_labels.costs.delete_v(); - _backup_weights.delete_v(); - _backup_nf.delete_v(); - _prob_s.delete_v(); - - _a_s.delete_v(); - _a_s_mtr_cs.delete_v(); - _gen_cs.pred_scores.costs.delete_v(); - } - private: void learn_IPS(multi_learner& base, multi_ex& examples); void learn_DR(multi_learner& base, multi_ex& examples); @@ -103,9 +88,9 @@ CB::cb_class get_observed_cost(multi_ex& examples) size_t i = 0; for (example*& ec : examples) { - if (ec->l.cb.costs.size() == 1 && ec->l.cb.costs[0].cost != FLT_MAX && ec->l.cb.costs[0].probability > 0) + if (ec->l.cb().costs.size() == 1 && ec->l.cb().costs[0].cost != FLT_MAX && ec->l.cb().costs[0].probability > 0) { - ld = &ec->l.cb; + ld = &ec->l.cb(); index = (int)i; } ++i; @@ -146,10 +131,10 @@ void cb_adf::learn_SM(multi_learner& base, multi_ex& examples) _a_s.clear(); _prob_s.clear(); // TODO: Check that predicted scores are always stored with the first example - for (uint32_t i = 0; i < examples[0]->pred.a_s.size(); i++) + for (uint32_t i = 0; i < examples[0]->pred.action_probs().size(); i++) { - _a_s.push_back({examples[0]->pred.a_s[i].action, examples[0]->pred.a_s[i].score}); - _prob_s.push_back({examples[0]->pred.a_s[i].action, 0.0}); + _a_s.push_back({examples[0]->pred.action_probs()[i].action, examples[0]->pred.action_probs()[i].score}); + _prob_s.push_back({examples[0]->pred.action_probs()[i].action, 0.0}); } float sign_offset = 1.0; // To account for negative rewards/costs @@ -158,7 +143,7 @@ void cb_adf::learn_SM(multi_learner& base, multi_ex& examples) for (uint32_t i = 0; i < examples.size(); i++) { - CB::label ld = examples[i]->l.cb; + CB::label ld = examples[i]->l.cb(); if (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX) { chosen_action = i; @@ -241,7 +226,7 @@ void cb_adf::learn_MTR(multi_learner& base, multi_ex& examples) { gen_cs_example_ips(examples, _cs_labels); call_cs_ldf(base, examples, _cb_labels, _cs_labels, _prepped_cs_labels, _offset); - std::swap(examples[0]->pred.a_s, _a_s); + std::swap(examples[0]->pred.action_probs(), _a_s); } // second train on _one_ action (which requires up to 3 examples). // We must go through the cost sensitive classifier layer to get @@ -249,16 +234,16 @@ void cb_adf::learn_MTR(multi_learner& base, multi_ex& examples) gen_cs_example_mtr(_gen_cs, examples, _cs_labels); uint32_t nf = (uint32_t)examples[_gen_cs.mtr_example]->num_features; float old_weight = examples[_gen_cs.mtr_example]->weight; - const float clipped_p = std::max(examples[_gen_cs.mtr_example]->l.cb.costs[0].probability, _clip_p); + const float clipped_p = std::max(examples[_gen_cs.mtr_example]->l.cb().costs[0].probability, _clip_p); examples[_gen_cs.mtr_example]->weight *= 1.f / clipped_p * ((float)_gen_cs.event_sum / (float)_gen_cs.action_sum); - std::swap(_gen_cs.mtr_ec_seq[0]->pred.a_s, _a_s_mtr_cs); + std::swap(_gen_cs.mtr_ec_seq[0]->pred.action_probs(), _a_s_mtr_cs); // TODO!!! cb_labels are not getting properly restored (empty costs are dropped) GEN_CS::call_cs_ldf(base, _gen_cs.mtr_ec_seq, _cb_labels, _cs_labels, _prepped_cs_labels, _offset); examples[_gen_cs.mtr_example]->num_features = nf; examples[_gen_cs.mtr_example]->weight = old_weight; - std::swap(_gen_cs.mtr_ec_seq[0]->pred.a_s, _a_s_mtr_cs); - std::swap(examples[0]->pred.a_s, _a_s); + std::swap(_gen_cs.mtr_ec_seq[0]->pred.action_probs(), _a_s_mtr_cs); + std::swap(examples[0]->pred.action_probs(), _a_s); } // Validates a multiline example collection as a valid sequence for action dependent features format. @@ -272,11 +257,11 @@ example* test_adf_sequence(multi_ex& ec_seq) for (auto* ec : ec_seq) { // Check if there is more than one cost for this example. - if (ec->l.cb.costs.size() > 1) + if (ec->l.cb().costs.size() > 1) THROW("cb_adf: badly formatted example, only one cost can be known."); // Check whether the cost was initialized to a value. - if (ec->l.cb.costs.size() == 1 && ec->l.cb.costs[0].cost != FLT_MAX) + if (ec->l.cb().costs.size() == 1 && ec->l.cb().costs[0].cost != FLT_MAX) { ret = ec; count += 1; @@ -298,8 +283,8 @@ void cb_adf::do_actual_learning(multi_learner& base, multi_ex& ec_seq) /* v_array temp_scores; temp_scores = v_init(); do_actual_learning(data,base); - for (size_t i = 0; i < data.ec_seq[0]->pred.a_s.size(); i++) - temp_scores.push_back(data.ec_seq[0]->pred.a_s[i].score);*/ + for (size_t i = 0; i < data.ec_seq[0]->pred.action_scores().size(); i++) + temp_scores.push_back(data.ec_seq[0]->pred.action_scores()[i].score);*/ switch (_gen_cs.cb_type) { case CB_TYPE_IPS: @@ -325,9 +310,9 @@ void cb_adf::do_actual_learning(multi_learner& base, multi_ex& ec_seq) } /* for (size_t i = 0; i < temp_scores.size(); i++) - if (temp_scores[i] != data.ec_seq[0]->pred.a_s[i].score) - std::cout << "problem! " << temp_scores[i] << " != " << data.ec_seq[0]->pred.a_s[i].score << " for " << - data.ec_seq[0]->pred.a_s[i].action << std::endl; temp_scores.delete_v();*/ + if (temp_scores[i] != data.ec_seq[0]->pred.action_scores()[i].score) + std::cout << "problem! " << temp_scores[i] << " != " << data.ec_seq[0]->pred.action_scores()[i].score << " for " << + data.ec_seq[0]->pred.action_scores()[i].action << std::endl; temp_scores.delete_v();*/ } else { @@ -355,7 +340,7 @@ bool cb_adf::update_statistics(example& ec, multi_ex* ec_seq) { size_t num_features = 0; - uint32_t action = ec.pred.a_s[0].action; + uint32_t action = ec.pred.action_probs()[0].action; for (const auto& example : *ec_seq) num_features += example->num_features; float loss = 0.; @@ -380,15 +365,14 @@ void output_example(vw& all, cb_adf& c, example& ec, multi_ex* ec_seq) bool labeled_example = c.update_statistics(ec, ec_seq); - uint32_t action = ec.pred.a_s[0].action; - for (int sink : all.final_prediction_sink) - all.print_by_ref(sink, (float)action, 0, ec.tag); + uint32_t action = ec.pred.action_probs()[0].action; + for (int sink : all.final_prediction_sink) all.print_by_ref(sink, (float)action, 0, ec.tag); if (all.raw_prediction > 0) { std::string outputString; std::stringstream outputStringStream(outputString); - const auto& costs = ec.l.cb.costs; + const auto& costs = ec.l.cb().costs; for (size_t i = 0; i < costs.size(); i++) { @@ -404,14 +388,14 @@ void output_example(vw& all, cb_adf& c, example& ec, multi_ex* ec_seq) void output_rank_example(vw& all, cb_adf& c, example& ec, multi_ex* ec_seq) { - const auto& costs = ec.l.cb.costs; + const auto& costs = ec.l.cb().costs; if (example_is_newline_not_header(ec)) return; bool labeled_example = c.update_statistics(ec, ec_seq); - for (int sink : all.final_prediction_sink) print_action_score(sink, ec.pred.a_s, ec.tag); + for (int sink : all.final_prediction_sink) print_action_score(sink, ec.pred.action_probs(), ec.tag); if (all.raw_prediction > 0) { @@ -540,8 +524,6 @@ base_learner* cb_adf_setup(options_i& options, vw& all) all.trace_message << "warning: clipping probability not yet implemented for cb_type sm; p will not be clipped." << std::endl; - all.delete_prediction = ACTION_SCORE::delete_action_scores; - // Push necessary flags. if ((!options.was_supplied("csoaa_ldf") && !options.was_supplied("wap_ldf")) || rank_all || !options.was_supplied("csoaa_rank")) @@ -570,11 +552,12 @@ base_learner* cb_adf_setup(options_i& options, vw& all) cb_adf* bare = ld.get(); learner& l = - init_learner(ld, base, learn, predict, problem_multiplier, prediction_type_t::action_scores); + init_learner(ld, base, learn, predict, problem_multiplier, prediction_type_t::action_probs); l.set_finish_example(CB_ADF::finish_multiline_example); bare->set_scorer(all.scorer); l.set_save_load(CB_ADF::save_load); + l.label_type = label_type_t::cb; return make_base(l); } diff --git a/vowpalwabbit/cb_algs.cc b/vowpalwabbit/cb_algs.cc index 2c2d9614ea7..447433acf5a 100644 --- a/vowpalwabbit/cb_algs.cc +++ b/vowpalwabbit/cb_algs.cc @@ -21,12 +21,6 @@ struct cb { cb_to_cs cbcs; COST_SENSITIVE::label cb_cs_ld; - - ~cb() - { - cb_cs_ld.costs.delete_v(); - COST_SENSITIVE::cs_label.delete_label(&cbcs.pred_scores); - } }; bool know_all_cost_example(CB::label& ld) @@ -47,7 +41,7 @@ bool know_all_cost_example(CB::label& ld) template void predict_or_learn(cb& data, single_learner& base, example& ec) { - CB::label ld = ec.l.cb; + CB::label ld = std::move(ec.l.cb()); cb_to_cs& c = data.cbcs; c.known_cost = get_observed_cost(ld); if (c.known_cost != nullptr && (c.known_cost->action < 1 || c.known_cost->action > c.num_actions)) @@ -58,8 +52,8 @@ void predict_or_learn(cb& data, single_learner& base, example& ec) if (c.cb_type != CB_TYPE_DM) { - ec.l.cs = data.cb_cs_ld; - + ec.l.reset(); + ec.l.init_as_cs(data.cb_cs_ld); if (is_learn) base.learn(ec); else @@ -67,15 +61,16 @@ void predict_or_learn(cb& data, single_learner& base, example& ec) for (size_t i = 0; i < ld.costs.size(); i++) ld.costs[i].partial_prediction = data.cb_cs_ld.costs[i].partial_prediction; - ec.l.cb = ld; } + ec.l.reset(); + ec.l.init_as_cb(std::move(ld)); } void predict_eval(cb&, single_learner&, example&) { THROW("can not use a test label for evaluation"); } void learn_eval(cb& data, single_learner&, example& ec) { - CB_EVAL::label ld = ec.l.cb_eval; + CB_EVAL::label& ld = ec.l.cb_eval(); cb_to_cs& c = data.cbcs; c.known_cost = get_observed_cost(ld.event); @@ -84,7 +79,7 @@ void learn_eval(cb& data, single_learner&, example& ec) for (size_t i = 0; i < ld.event.costs.size(); i++) ld.event.costs[i].partial_prediction = data.cb_cs_ld.costs[i].partial_prediction; - ec.pred.multiclass = ec.l.cb_eval.action; + ec.pred.multiclass() = ec.l.cb_eval().action; } void output_example(vw& all, cb& data, example& ec, CB::label& ld) @@ -92,14 +87,13 @@ void output_example(vw& all, cb& data, example& ec, CB::label& ld) float loss = 0.; cb_to_cs& c = data.cbcs; - if (!CB::cb_label.test_label(&ld)) - loss = get_cost_estimate(c.known_cost, c.pred_scores, ec.pred.multiclass); + if (!CB::test_label(ld)) + loss = get_cost_estimate(c.known_cost, c.pred_scores, ec.pred.multiclass()); - all.sd->update(ec.test_only, !CB::cb_label.test_label(&ld), loss, 1.f, ec.num_features); + all.sd->update(ec.test_only, !CB::test_label(ld), loss, 1.f, ec.num_features); for (int sink : all.final_prediction_sink) - all.print_by_ref(sink, (float)ec.pred.multiclass, 0, ec.tag); - + all.print_by_ref(sink, (float)ec.pred.multiclass(), 0, ec.tag); if (all.raw_prediction > 0) { std::stringstream outputStringStream; @@ -113,18 +107,18 @@ void output_example(vw& all, cb& data, example& ec, CB::label& ld) all.print_text_by_ref(all.raw_prediction, outputStringStream.str(), ec.tag); } - print_update(all, CB::cb_label.test_label(&ld), ec, nullptr, false); + print_update(all, CB::test_label(ld), ec, nullptr, false); } void finish_example(vw& all, cb& c, example& ec) { - output_example(all, c, ec, ec.l.cb); + output_example(all, c, ec, ec.l.cb()); VW::finish_example(all, ec); } void eval_finish_example(vw& all, cb& c, example& ec) { - output_example(all, c, ec, ec.l.cb_eval.event); + output_example(all, c, ec, ec.l.cb_eval().event); VW::finish_example(all, ec); } } // namespace CB_ALGS @@ -183,28 +177,23 @@ base_learner* cb_algs_setup(options_i& options, vw& all) } auto base = as_singleline(setup_base(options, all)); - if (eval) - { - all.p->lp = CB_EVAL::cb_eval; - all.label_type = label_type_t::cb_eval; - } - else - { - all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; - } learner* l; if (eval) { l = &init_learner(data, base, learn_eval, predict_eval, problem_multiplier, prediction_type_t::multiclass); l->set_finish_example(eval_finish_example); + all.p->lp = CB_EVAL::cb_eval; + l->label_type = label_type_t::cb_eval; + } else { l = &init_learner( data, base, predict_or_learn, predict_or_learn, problem_multiplier, prediction_type_t::multiclass); l->set_finish_example(finish_example); + all.p->lp = CB::cb_label; + l->label_type = label_type_t::cb; } c.scorer = all.scorer; diff --git a/vowpalwabbit/cb_algs.h b/vowpalwabbit/cb_algs.h index 3e9f1657521..653de954667 100644 --- a/vowpalwabbit/cb_algs.h +++ b/vowpalwabbit/cb_algs.h @@ -21,7 +21,7 @@ template float get_cost_pred( LEARNER::single_learner* scorer, CB::cb_class* known_cost, example& ec, uint32_t index, uint32_t base) { - CB::label ld = ec.l.cb; + auto label = std::move(ec.l); label_data simple_temp; simple_temp.initial = 0.; @@ -32,8 +32,12 @@ float get_cost_pred( const bool baseline_enabled_old = BASELINE::baseline_enabled(&ec); BASELINE::set_baseline_enabled(&ec); - ec.l.simple = simple_temp; - polyprediction p = ec.pred; + ec.l.reset(); + ec.l.init_as_simple(simple_temp); + // Save what is in the prediction right now, and restore it before we exit the function. + polyprediction p = std::move(ec.pred); + ec.pred.reset(); + ec.pred.init_as_scalar(); if (is_learn && known_cost != nullptr && index == known_cost->action) { float old_weight = ec.weight; @@ -46,11 +50,10 @@ float get_cost_pred( if (!baseline_enabled_old) BASELINE::reset_baseline_disabled(&ec); - float pred = ec.pred.scalar; - ec.pred = p; - - ec.l.cb = ld; + float pred = ec.pred.scalar(); + ec.pred = std::move(p); + ec.l = std::move(label); return pred; } diff --git a/vowpalwabbit/cb_dro.cc b/vowpalwabbit/cb_dro.cc index d0d03778ba5..18a2f63892d 100644 --- a/vowpalwabbit/cb_dro.cc +++ b/vowpalwabbit/cb_dro.cc @@ -36,14 +36,14 @@ struct cb_dro_data if (is_learn) { - const auto it = std::find_if(examples.begin(), examples.end(), [](example *item) { return !item->l.cb.costs.empty(); }); + const auto it = std::find_if(examples.begin(), examples.end(), [](example *item) { return !item->l.cb().costs.empty(); }); if (it != examples.end()) { - const CB::cb_class logged = (*it)->l.cb.costs[0]; + const CB::cb_class logged = (*it)->l.cb().costs[0]; const uint32_t labelled_action = std::distance(examples.begin(), it); - const auto action_scores = examples[0]->pred.a_s; + const auto& action_scores = examples[0]->pred.action_probs(); // cb_explore_adf => want maximum probability // cb_adf => first action is a greedy action @@ -151,12 +151,19 @@ base_learner *cb_dro_setup(options_i &options, vw &all) THROW("invalid cb_dro parameter values supplied"); } + auto* base = as_multiline(setup_base(options, all)); if (options.was_supplied("cb_explore_adf")) { - return make_base(init_learner(data, as_multiline(setup_base(options, all)), learn_or_predict, learn_or_predict, 1 /* weights */, prediction_type_t::action_probs)); + auto& learner = init_learner(data, base, learn_or_predict, learn_or_predict, + 1 /* weights */, prediction_type_t::action_probs); + learner.label_type = label_type_t::cb; + return make_base(learner); } else { - return make_base(init_learner(data, as_multiline(setup_base(options, all)), learn_or_predict, learn_or_predict, 1 /* weights */, prediction_type_t::action_probs)); + auto& learner = init_learner(data, base, learn_or_predict, learn_or_predict, + 1 /* weights */, prediction_type_t::action_probs); + learner.label_type = label_type_t::cb; + return make_base(learner); } } diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index 0321ff54c56..c3c1b3c5d81 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -31,7 +31,7 @@ struct cb_explore COST_SENSITIVE::label cs_label; COST_SENSITIVE::label second_cs_label; - learner* cs; + learner* cost_sensitive_learner; size_t tau; float epsilon; @@ -40,24 +40,18 @@ struct cb_explore float psi; size_t counter; - - ~cb_explore() - { - preds.delete_v(); - cover_probs.delete_v(); - COST_SENSITIVE::cs_label.delete_label(&cbcs.pred_scores); - COST_SENSITIVE::cs_label.delete_label(&cs_label); - COST_SENSITIVE::cs_label.delete_label(&second_cs_label); - } }; template void predict_or_learn_first(cb_explore& data, single_learner& base, example& ec) { // Explore tau times, then act according to optimal. - action_scores probs = ec.pred.a_s; + auto probs = std::move(ec.pred.action_probs()); + probs.clear(); + ec.pred.reset(); + ec.pred.init_as_multiclass(); - if (is_learn && ec.l.cb.costs[0].probability < 1) + if (is_learn && ec.l.cb().costs[0].probability < 1) base.learn(ec); else base.predict(ec); @@ -71,22 +65,23 @@ void predict_or_learn_first(cb_explore& data, single_learner& base, example& ec) } else { - uint32_t chosen = ec.pred.multiclass - 1; + uint32_t chosen = ec.pred.multiclass() - 1; for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.}); probs[chosen].score = 1.0; } - ec.pred.a_s = probs; + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(probs)); } template void predict_or_learn_greedy(cb_explore& data, single_learner& base, example& ec) { // Explore uniform random an epsilon fraction of the time. - // TODO: pointers are copied here. What happens if base.learn/base.predict re-allocs? - // ec.pred.a_s = probs; will restore the than free'd memory - action_scores probs = ec.pred.a_s; + auto probs = std::move(ec.pred.action_probs()); probs.clear(); + ec.pred.reset(); + ec.pred.init_as_multiclass(); if (is_learn) base.learn(ec); @@ -95,18 +90,22 @@ void predict_or_learn_greedy(cb_explore& data, single_learner& base, example& ec // pre-allocate pdf probs.resize(data.cbcs.num_actions); - for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0}); - generate_epsilon_greedy(data.epsilon, ec.pred.multiclass - 1, begin_scores(probs), end_scores(probs)); + for (uint32_t i = 0; i < data.cbcs.num_actions; i++) + probs.push_back({i, 0}); + generate_epsilon_greedy(data.epsilon, ec.pred.multiclass() - 1, begin_scores(probs), end_scores(probs)); - ec.pred.a_s = probs; + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(probs)); } template void predict_or_learn_bag(cb_explore& data, single_learner& base, example& ec) { // Randomize over predictions from a base set of predictors - action_scores probs = ec.pred.a_s; + auto probs = std::move(ec.pred.action_probs()); probs.clear(); + ec.pred.reset(); + ec.pred.init_as_multiclass(); for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.}); float prob = 1.f / (float)data.bag_size; @@ -117,13 +116,14 @@ void predict_or_learn_bag(cb_explore& data, single_learner& base, example& ec) base.learn(ec, i); else base.predict(ec, i); - uint32_t chosen = ec.pred.multiclass - 1; + uint32_t chosen = ec.pred.multiclass() - 1; probs[chosen].score += prob; if (is_learn) for (uint32_t j = 1; j < count; j++) base.learn(ec, i); } - ec.pred.a_s = probs; + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(probs)); } void get_cover_probabilities(cb_explore& data, single_learner& /* base */, example& ec, v_array& probs) @@ -131,16 +131,19 @@ void get_cover_probabilities(cb_explore& data, single_learner& /* base */, examp float additive_probability = 1.f / (float)data.cover_size; data.preds.clear(); + ec.pred.reset(); + ec.pred.init_as_multiclass(); + for (uint32_t i = 0; i < data.cbcs.num_actions; i++) probs.push_back({i, 0.}); for (size_t i = 0; i < data.cover_size; i++) { // get predicted cost-sensitive predictions if (i == 0) - data.cs->predict(ec, i); + data.cost_sensitive_learner->predict(ec, i); else - data.cs->predict(ec, i + 1); - uint32_t pred = ec.pred.multiclass; + data.cost_sensitive_learner->predict(ec, i + 1); + uint32_t pred = ec.pred.multiclass(); probs[pred - 1].score += additive_probability; data.preds.push_back((uint32_t)pred); } @@ -161,7 +164,7 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec) uint32_t num_actions = data.cbcs.num_actions; - action_scores probs = ec.pred.a_s; + auto probs = std::move(ec.pred.action_probs()); probs.clear(); data.cs_label.costs.clear(); @@ -176,14 +179,16 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec) float min_prob = std::min(1.f / num_actions, 1.f / (float)std::sqrt(counter * num_actions)); - data.cb_label = ec.l.cb; + data.cb_label = std::move(ec.l.cb()); - ec.l.cs = data.cs_label; + ec.l.reset(); + ec.l.init_as_cs() = std::move(data.cs_label); get_cover_probabilities(data, base, ec, probs); if (is_learn) { - ec.l.cb = data.cb_label; + ec.l.reset(); + ec.l.init_as_cb() = std::move(data.cb_label); base.learn(ec); // Now update oracles @@ -191,12 +196,17 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec) // 1. Compute loss vector data.cs_label.costs.clear(); float norm = min_prob * num_actions; - ec.l.cb = data.cb_label; - data.cbcs.known_cost = get_observed_cost(data.cb_label); - gen_cs_example(data.cbcs, ec, data.cb_label, data.cs_label); + // This should not be nee2ded as it was done just above. + // ec.l.cb() = data.cb_label; + data.cbcs.known_cost = get_observed_cost(ec.l.cb()); + gen_cs_example(data.cbcs, ec, ec.l.cb(), data.cs_label); for (uint32_t i = 0; i < num_actions; i++) probabilities[i] = 0; - ec.l.cs = data.second_cs_label; + data.cb_label = std::move(ec.l.cb()); + ec.l.reset(); + ec.l.init_as_cs(std::move(data.second_cs_label)); + auto& second_cs_label_ref = ec.l.cs(); + // 2. Update functions for (size_t i = 0; i < cover_size; i++) { @@ -205,21 +215,24 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec) { float pseudo_cost = data.cs_label.costs[j].x - data.psi * min_prob / (std::max(probabilities[j], min_prob) / norm) + 1; - data.second_cs_label.costs[j].class_index = j + 1; - data.second_cs_label.costs[j].x = pseudo_cost; + second_cs_label_ref.costs[j].class_index = j + 1; + second_cs_label_ref.costs[j].x = pseudo_cost; } if (i != 0) - data.cs->learn(ec, i + 1); + data.cost_sensitive_learner->learn(ec, i + 1); if (probabilities[predictions[i] - 1] < min_prob) norm += std::max(0.f, additive_probability - (min_prob - probabilities[predictions[i] - 1])); else norm += additive_probability; probabilities[predictions[i] - 1] += additive_probability; } + data.second_cs_label = std::move(ec.l.cs()); } - ec.l.cb = data.cb_label; - ec.pred.a_s = probs; + ec.l.reset(); + ec.l.init_as_cb(std::move(data.cb_label)); + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(probs)); } void print_update_cb_explore(vw& all, bool is_test, example& ec, std::stringstream& pred_string) @@ -230,7 +243,7 @@ void print_update_cb_explore(vw& all, bool is_test, example& ec, std::stringstre if (is_test) label_string << " unknown"; else - label_string << ec.l.cb.costs[0].action; + label_string << ec.l.cb().costs[0].action; all.sd->print_update(all.holdout_set_off, all.current_pass, label_string.str(), pred_string.str(), ec.num_features, all.progress_add, all.progress_arg); } @@ -243,20 +256,20 @@ void output_example(vw& all, cb_explore& data, example& ec, CB::label& ld) cb_to_cs& c = data.cbcs; if ((c.known_cost = get_observed_cost(ld)) != nullptr) - for (uint32_t i = 0; i < ec.pred.a_s.size(); i++) - loss += get_cost_estimate(c.known_cost, c.pred_scores, i + 1) * ec.pred.a_s[i].score; + for (uint32_t i = 0; i < ec.pred.action_probs().size(); i++) + loss += get_cost_estimate(c.known_cost, c.pred_scores, i + 1) * ec.pred.action_probs()[i].score; all.sd->update(ec.test_only, get_observed_cost(ld) != nullptr, loss, 1.f, ec.num_features); std::stringstream ss; float maxprob = 0.; uint32_t maxid = 0; - for (uint32_t i = 0; i < ec.pred.a_s.size(); i++) + for (uint32_t i = 0; i < ec.pred.action_probs().size(); i++) { - ss << std::fixed << ec.pred.a_s[i].score << " "; - if (ec.pred.a_s[i].score > maxprob) + ss << std::fixed << ec.pred.action_probs()[i].score << " "; + if (ec.pred.action_probs()[i].score > maxprob) { - maxprob = ec.pred.a_s[i].score; + maxprob = ec.pred.action_probs()[i].score; maxid = i + 1; } } @@ -264,12 +277,12 @@ void output_example(vw& all, cb_explore& data, example& ec, CB::label& ld) std::stringstream sso; sso << maxid << ":" << std::fixed << maxprob; - print_update_cb_explore(all, CB::cb_label.test_label(&ld), ec, sso); + print_update_cb_explore(all, CB::test_label(ld), ec, sso); } void finish_example(vw& all, cb_explore& c, example& ec) { - output_example(all, c, ec, ec.l.cb); + output_example(all, c, ec, ec.l.cb()); VW::finish_example(all, ec); } } // namespace CB_EXPLORE @@ -303,7 +316,6 @@ base_learner* cb_explore_setup(options_i& options, vw& all) options.insert("cb", ss.str()); } - all.delete_prediction = delete_action_scores; data->cbcs.cb_type = CB_TYPE_DR; single_learner* base = as_singleline(setup_base(options, all)); @@ -312,12 +324,10 @@ base_learner* cb_explore_setup(options_i& options, vw& all) learner* l; if (options.was_supplied("cover")) { - data->cs = (learner*)(as_singleline(all.cost_sensitive)); + data->cost_sensitive_learner = reinterpret_cast*>(as_singleline(all.cost_sensitive)); data->second_cs_label.costs.resize(num_actions); data->second_cs_label.costs.end() = data->second_cs_label.costs.begin() + num_actions; - data->cover_probs = v_init(); data->cover_probs.resize(num_actions); - data->preds = v_init(); data->preds.resize(data->cover_size); l = &init_learner(data, base, predict_or_learn_cover, predict_or_learn_cover, data->cover_size + 1, prediction_type_t::action_probs); @@ -333,5 +343,6 @@ base_learner* cb_explore_setup(options_i& options, vw& all) data, base, predict_or_learn_greedy, predict_or_learn_greedy, 1, prediction_type_t::action_probs); l->set_finish_example(finish_example); + l->label_type = label_type_t::cb; return make_base(*l); } diff --git a/vowpalwabbit/cb_explore_adf_bag.cc b/vowpalwabbit/cb_explore_adf_bag.cc index eaa389e5d7d..3084f71e14f 100644 --- a/vowpalwabbit/cb_explore_adf_bag.cc +++ b/vowpalwabbit/cb_explore_adf_bag.cc @@ -41,7 +41,7 @@ struct cb_explore_adf_bag public: cb_explore_adf_bag( float epsilon, size_t bag_size, bool greedify, bool first_only, std::shared_ptr random_state); - ~cb_explore_adf_bag(); + ~cb_explore_adf_bag() = default; // Should be called through cb_explore_adf_base for pre/post-processing void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl(base, examples); } @@ -62,7 +62,7 @@ template void cb_explore_adf_bag::predict_or_learn_impl(LEARNER::multi_learner& base, multi_ex& examples) { // Randomize over predictions from a base set of predictors - v_array& preds = examples[0]->pred.a_s; + auto& preds = examples[0]->pred.action_probs(); uint32_t num_actions = (uint32_t)examples.size(); if (num_actions == 0) { @@ -83,7 +83,7 @@ void cb_explore_adf_bag::predict_or_learn_impl(LEARNER::multi_learner& base, mul LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset, i); else LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset, i); - + auto& preds = examples[0]->pred.action_probs(); assert(preds.size() == num_actions); for (auto e : preds) _scores[e.action] += e.score; @@ -113,8 +113,6 @@ void cb_explore_adf_bag::predict_or_learn_impl(LEARNER::multi_learner& base, mul for (size_t i = 0; i < num_actions; i++) preds[i] = _action_probs[i]; } -cb_explore_adf_bag::~cb_explore_adf_bag() { _action_probs.delete_v(); } - LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) { using config::make_option; @@ -143,19 +141,16 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) options.insert("cb_adf", ""); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - size_t problem_multiplier = bag_size; LEARNER::multi_learner* base = as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw(epsilon, bag_size, greedify, first_only, all.get_random_state()); LEARNER::learner& l = LEARNER::init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); - + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); return make_base(l); } diff --git a/vowpalwabbit/cb_explore_adf_common.h b/vowpalwabbit/cb_explore_adf_common.h index ea6b0efb421..1307e2c2630 100644 --- a/vowpalwabbit/cb_explore_adf_common.h +++ b/vowpalwabbit/cb_explore_adf_common.h @@ -93,8 +93,8 @@ inline void cb_explore_adf_base::predict( if (label_example != nullptr) { // predict path, replace the label example with an empty one - data._action_label = label_example->l.cb; - label_example->l.cb = data._empty_label; + data._action_label = std::move(label_example->l.cb()); + label_example->l.cb() = std::move(data._empty_label); } data.explore.predict(base, examples); @@ -102,7 +102,8 @@ inline void cb_explore_adf_base::predict( if (label_example != nullptr) { // predict path, restore label - label_example->l.cb = data._action_label; + data._empty_label = std::move(label_example->l.cb()); + label_example->l.cb() = std::move(data._action_label); } } @@ -134,7 +135,7 @@ void cb_explore_adf_base::output_example(vw& all, multi_ex& ec_seq) float loss = 0.; auto& ec = *ec_seq[0]; - const auto& preds = ec.pred.a_s; + const auto& preds = ec.pred.action_probs(); for (const auto& example : ec_seq) { @@ -158,13 +159,13 @@ void cb_explore_adf_base::output_example(vw& all, multi_ex& ec_seq) all.sd->update(holdout_example, labeled_example, loss, ec.weight, num_features); - for (auto sink : all.final_prediction_sink) ACTION_SCORE::print_action_score(sink, ec.pred.a_s, ec.tag); + for (auto sink : all.final_prediction_sink) ACTION_SCORE::print_action_score(sink, ec.pred.action_probs(), ec.tag); if (all.raw_prediction > 0) { std::string outputString; std::stringstream outputStringStream(outputString); - const auto& costs = ec.l.cb.costs; + const auto& costs = ec.l.cb().costs; for (size_t i = 0; i < costs.size(); i++) { diff --git a/vowpalwabbit/cb_explore_adf_cover.cc b/vowpalwabbit/cb_explore_adf_cover.cc index b811daa7a01..e7ef22e0fb6 100644 --- a/vowpalwabbit/cb_explore_adf_cover.cc +++ b/vowpalwabbit/cb_explore_adf_cover.cc @@ -44,7 +44,7 @@ struct cb_explore_adf_cover public: cb_explore_adf_cover(size_t cover_size, float psi, bool nounif, bool first_only, LEARNER::multi_learner* cs_ldf_learner, LEARNER::single_learner* scorer, size_t cb_type); - ~cb_explore_adf_cover(); + ~cb_explore_adf_cover() = default; // Should be called through cb_explore_adf_base for pre/post-processing void predict(LEARNER::multi_learner& base, multi_ex& examples) { predict_or_learn_impl(base, examples); } @@ -72,6 +72,7 @@ void cb_explore_adf_cover::predict_or_learn_impl(LEARNER::multi_learner& base, m // Randomize over predictions from a base set of predictors // Use cost sensitive oracle to cover actions to form distribution. const bool is_mtr = _gen_cs.cb_type == CB_TYPE_MTR; + // swap_to_scores(examples); if (is_learn) { if (is_mtr) // use DR estimates for non-ERM policies in MTR @@ -85,7 +86,8 @@ void cb_explore_adf_cover::predict_or_learn_impl(LEARNER::multi_learner& base, m GEN_CS::gen_cs_example_ips(examples, _cs_labels); LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); } - v_array& preds = examples[0]->pred.a_s; + // swap_to_probs(examples); + auto& preds = examples[0]->pred.action_probs(); const uint32_t num_actions = (uint32_t)preds.size(); float additive_probability = 1.f / (float)_cover_size; @@ -164,17 +166,6 @@ void cb_explore_adf_cover::predict_or_learn_impl(LEARNER::multi_learner& base, m ++_counter; } -cb_explore_adf_cover::~cb_explore_adf_cover() -{ - _cb_labels.delete_v(); - for (size_t i = 0; i < _prepped_cs_labels.size(); i++) _prepped_cs_labels[i].costs.delete_v(); - _prepped_cs_labels.delete_v(); - _cs_labels_2.costs.delete_v(); - _cs_labels.costs.delete_v(); - _action_probs.delete_v(); - _gen_cs.pred_scores.costs.delete_v(); -} - LEARNER::base_learner* setup(config::options_i& options, vw& all) { using config::make_option; @@ -216,8 +207,6 @@ LEARNER::base_learner* setup(config::options_i& options, vw& all) options.insert("cb_adf", ""); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - // Set cb_type size_t cb_type_enum; if (type_string.compare("dr") == 0) @@ -242,7 +231,6 @@ LEARNER::base_learner* setup(config::options_i& options, vw& all) LEARNER::multi_learner* base = LEARNER::as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw( @@ -250,6 +238,7 @@ LEARNER::base_learner* setup(config::options_i& options, vw& all) LEARNER::learner& l = init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); return make_base(l); diff --git a/vowpalwabbit/cb_explore_adf_first.cc b/vowpalwabbit/cb_explore_adf_first.cc index 2aa4c142264..4f4ff620afd 100644 --- a/vowpalwabbit/cb_explore_adf_first.cc +++ b/vowpalwabbit/cb_explore_adf_first.cc @@ -51,8 +51,7 @@ void cb_explore_adf_first::predict_or_learn_impl(LEARNER::multi_learner& base, m LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); else LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); - - v_array& preds = examples[0]->pred.a_s; + auto& preds = examples[0]->pred.action_probs(); uint32_t num_actions = (uint32_t)preds.size(); if (_tau) @@ -95,22 +94,19 @@ LEARNER::base_learner* setup(config::options_i& options, vw& all) options.insert("cb_adf", ""); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - size_t problem_multiplier = 1; LEARNER::multi_learner* base = LEARNER::as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw(tau, epsilon); LEARNER::learner& l = LEARNER::init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); - + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); - return make_base(l); + return make_base(l); } } // namespace first } // namespace cb_explore_adf diff --git a/vowpalwabbit/cb_explore_adf_greedy.cc b/vowpalwabbit/cb_explore_adf_greedy.cc index ae8ea0c5f68..ee910fe9543 100644 --- a/vowpalwabbit/cb_explore_adf_greedy.cc +++ b/vowpalwabbit/cb_explore_adf_greedy.cc @@ -50,8 +50,7 @@ void cb_explore_adf_greedy::predict_or_learn_impl(LEARNER::multi_learner& base, { // Explore uniform random an epsilon fraction of the time. LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); - - ACTION_SCORE::action_scores& preds = examples[0]->pred.a_s; + auto& preds = examples[0]->pred.action_probs(); uint32_t num_actions = (uint32_t)preds.size(); @@ -97,8 +96,6 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) options.insert("cb_adf", ""); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - size_t problem_multiplier = 1; if (!options.was_supplied("epsilon")) @@ -106,13 +103,13 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) LEARNER::multi_learner* base = as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw(epsilon, first_only); LEARNER::learner& l = LEARNER::init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); return make_base(l); diff --git a/vowpalwabbit/cb_explore_adf_regcb.cc b/vowpalwabbit/cb_explore_adf_regcb.cc index 1c282736872..2e33dc52705 100644 --- a/vowpalwabbit/cb_explore_adf_regcb.cc +++ b/vowpalwabbit/cb_explore_adf_regcb.cc @@ -95,7 +95,7 @@ float cb_explore_adf_regcb::binary_search(float fhat, float delta, float sens, f void cb_explore_adf_regcb::get_cost_ranges(float delta, LEARNER::multi_learner& base, multi_ex& examples, bool min_only) { - const size_t num_actions = examples[0]->pred.a_s.size(); + const size_t num_actions = examples[0]->pred.action_probs().size(); _min_costs.resize(num_actions); _max_costs.resize(num_actions); @@ -105,14 +105,15 @@ void cb_explore_adf_regcb::get_cost_ranges(float delta, LEARNER::multi_learner& // backup cb example data for (const auto& ex : examples) { - _ex_as.push_back(ex->pred.a_s); - _ex_costs.push_back(ex->l.cb.costs); + _ex_as.push_back(std::move(ex->pred.action_probs())); + _ex_costs.push_back(std::move(ex->l.cb().costs)); } // set regressor predictions for (const auto& as : _ex_as[0]) { - examples[as.action]->pred.scalar = as.score; + examples[as.action]->pred.reset(); + examples[as.action]->pred.init_as_scalar() = as.score; } const float cmin = _min_cb_cost; @@ -121,32 +122,33 @@ void cb_explore_adf_regcb::get_cost_ranges(float delta, LEARNER::multi_learner& for (size_t a = 0; a < num_actions; ++a) { example* ec = examples[a]; - ec->l.simple.label = cmin - 1; + ec->l.reset(); + ec->l.init_as_simple().label = cmin - 1; float sens = base.sensitivity(*ec); float w = 0; // importance weight - if (ec->pred.scalar < cmin || std::isnan(sens) || std::isinf(sens)) + if (ec->pred.scalar() < cmin || std::isnan(sens) || std::isinf(sens)) _min_costs[a] = cmin; else { - w = binary_search(ec->pred.scalar - cmin + 1, delta, sens); - _min_costs[a] = (std::max)(ec->pred.scalar - sens * w, cmin); + w = binary_search(ec->pred.scalar() - cmin + 1, delta, sens); + _min_costs[a] = (std::max)(ec->pred.scalar() - sens * w, cmin); if (_min_costs[a] > cmax) _min_costs[a] = cmax; } if (!min_only) { - ec->l.simple.label = cmax + 1; + ec->l.simple().label = cmax + 1; sens = base.sensitivity(*ec); - if (ec->pred.scalar > cmax || std::isnan(sens) || std::isinf(sens)) + if (ec->pred.scalar() > cmax || std::isnan(sens) || std::isinf(sens)) { _max_costs[a] = cmax; } else { - w = binary_search(cmax + 1 - ec->pred.scalar, delta, sens); - _max_costs[a] = (std::min)(ec->pred.scalar + sens * w, cmax); + w = binary_search(cmax + 1 - ec->pred.scalar(), delta, sens); + _max_costs[a] = (std::min)(ec->pred.scalar() + sens * w, cmax); if (_max_costs[a] < cmin) _max_costs[a] = cmin; } @@ -156,8 +158,11 @@ void cb_explore_adf_regcb::get_cost_ranges(float delta, LEARNER::multi_learner& // reset cb example data for (size_t i = 0; i < examples.size(); ++i) { - examples[i]->pred.a_s = _ex_as[i]; - examples[i]->l.cb.costs = _ex_costs[i]; + examples[i]->pred.reset(); + examples[i]->pred.init_as_action_probs() = std::move(_ex_as[i]); + examples[i]->l.reset(); + examples[i]->l.init_as_cb(); + examples[i]->l.cb().costs = std::move(_ex_costs[i]); } } @@ -168,7 +173,7 @@ void cb_explore_adf_regcb::predict_or_learn_impl(LEARNER::multi_learner& base, m { for (size_t i = 0; i < examples.size() - 1; ++i) { - CB::label& ld = examples[i]->l.cb; + CB::label& ld = examples[i]->l.cb(); if (ld.costs.size() == 1) ld.costs[0].probability = 1.f; // no importance weighting } @@ -178,8 +183,7 @@ void cb_explore_adf_regcb::predict_or_learn_impl(LEARNER::multi_learner& base, m } else LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); - - v_array& preds = examples[0]->pred.a_s; + auto& preds = examples[0]->pred.action_probs(); uint32_t num_actions = (uint32_t)preds.size(); const float max_range = _max_cb_cost - _min_cb_cost; @@ -272,19 +276,17 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) options.replace("cb_type", mtr); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - // Set explore_type size_t problem_multiplier = 1; LEARNER::multi_learner* base = as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw(regcbopt, c0, first_only, min_cb_cost, max_cb_cost); LEARNER::learner& l = LEARNER::init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); return make_base(l); diff --git a/vowpalwabbit/cb_explore_adf_softmax.cc b/vowpalwabbit/cb_explore_adf_softmax.cc index d2b465fcac2..980e1eda01d 100644 --- a/vowpalwabbit/cb_explore_adf_softmax.cc +++ b/vowpalwabbit/cb_explore_adf_softmax.cc @@ -45,8 +45,7 @@ template void cb_explore_adf_softmax::predict_or_learn_impl(LEARNER::multi_learner& base, multi_ex& examples) { LEARNER::multiline_learn_or_predict(base, examples, examples[0]->ft_offset); - - v_array& preds = examples[0]->pred.a_s; + auto& preds = examples[0]->pred.action_probs(); exploration::generate_softmax( -_lambda, begin_scores(preds), end_scores(preds), begin_scores(preds), end_scores(preds)); @@ -82,19 +81,17 @@ LEARNER::base_learner* setup(VW::config::options_i& options, vw& all) options.insert("cb_adf", ""); } - all.delete_prediction = ACTION_SCORE::delete_action_scores; - // Set explore_type size_t problem_multiplier = 1; LEARNER::multi_learner* base = as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; using explore_type = cb_explore_adf_base; auto data = scoped_calloc_or_throw(epsilon, lambda); LEARNER::learner& l = LEARNER::init_learner( data, base, explore_type::learn, explore_type::predict, problem_multiplier, prediction_type_t::action_probs); + l.label_type = label_type_t::cb; l.set_finish_example(explore_type::finish_multiline_example); return make_base(l); diff --git a/vowpalwabbit/cb_sample.cc b/vowpalwabbit/cb_sample.cc index d78b04a1753..d258d44b8ae 100644 --- a/vowpalwabbit/cb_sample.cc +++ b/vowpalwabbit/cb_sample.cc @@ -26,12 +26,12 @@ struct cb_sample_data { multiline_learn_or_predict(base, examples, examples[0]->ft_offset); - auto action_scores = examples[0]->pred.a_s; + auto& action_scores = examples[0]->pred.action_probs(); uint32_t chosen_action = -1; int labelled_action = -1; // Find that chosen action in the learning case, skip the shared example. - auto it = std::find_if(examples.begin(), examples.end(), [](example *item) { return !item->l.cb.costs.empty(); }); + auto it = std::find_if(examples.begin(), examples.end(), [](example *item) { return !item->l.cb().costs.empty(); }); if (it != examples.end()) { labelled_action = std::distance(examples.begin(), it); @@ -116,6 +116,9 @@ base_learner *cb_sample_setup(options_i &options, vw &all) } auto data = scoped_calloc_or_throw(all.get_random_state()); - return make_base(init_learner(data, as_multiline(setup_base(options, all)), learn_or_predict, + auto base = as_multiline(setup_base(options, all)); + auto l = make_base(init_learner(data, base, learn_or_predict, learn_or_predict, 1 /* weights */, prediction_type_t::action_probs)); + l->label_type = label_type_t::cb; + return l; } diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index c2cb93594ba..440945b04d9 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -23,6 +23,15 @@ struct cbify_adf_data { multi_ex ecs; size_t num_actions; + + ~cbify_adf_data() + { + for (auto& ex : ecs) + { + ex->~example(); + free(ex); + } + } }; struct cbify @@ -39,26 +48,9 @@ struct cbify float loss1; // for ldf inputs - std::vector> cs_costs; - std::vector> cb_costs; + std::vector cs_labels; + std::vector cb_labels; std::vector cb_as; - - ~cbify() - { - CB::cb_label.delete_label(&cb_label); - a_s.delete_v(); - - if (use_adf) - { - for (size_t a = 0; a < adf_data.num_actions; ++a) - { - adf_data.ecs[a]->pred.a_s.delete_v(); - VW::dealloc_example(CB::cb_label.delete_label, *adf_data.ecs[a]); - free_it(adf_data.ecs[a]); - } - for (auto& as : cb_as) as.delete_v(); - } - } }; float loss(cbify& data, uint32_t label, uint32_t final_prediction) @@ -83,14 +75,14 @@ float loss_cs(cbify& data, v_array& costs, uint32_t fina return data.loss0 + (data.loss1 - data.loss0) * cost; } -float loss_csldf(cbify& data, std::vector>& cs_costs, uint32_t final_prediction) +float loss_csldf(cbify& data, std::vector& cs_labels, uint32_t final_prediction) { float cost = 0.; - for (auto costs : cs_costs) + for (auto& label : cs_labels) { - if (costs[0].class_index == final_prediction) + if (label.costs[0].class_index == final_prediction) { - cost = costs[0].x; + cost = label.costs[0].x; break; } } @@ -107,8 +99,8 @@ void copy_example_to_adf(cbify& data, example& ec) { auto& eca = *adf_data.ecs[a]; // clear label - auto& lab = eca.l.cb; - CB::cb_label.default_label(&lab); + auto& lab = eca.l.cb(); + CB::default_label(lab); // copy data VW::copy_example_data(false, &eca, &ec); @@ -123,7 +115,7 @@ void copy_example_to_adf(cbify& data, example& ec) } // avoid empty example by adding a tag (hacky) - if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(&eca.l)) + if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(eca.l)) { eca.tag.push_back('n'); } @@ -137,26 +129,28 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec) MULTICLASS::label_t ld; COST_SENSITIVE::label csl; if (use_cs) - csl = ec.l.cs; + csl = std::move(ec.l.cs()); else - ld = ec.l.multi; + ld = std::move(ec.l.multi()); data.cb_label.costs.clear(); - ec.l.cb = data.cb_label; - ec.pred.a_s = data.a_s; + ec.l.reset(); + ec.l.init_as_cb(data.cb_label); + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(data.a_s)); // Call the cb_explore algorithm. It returns a vector of probabilities for each action base.predict(ec); - // data.probs = ec.pred.scalars; + // data.probs = ec.pred.scalars(); uint32_t chosen_action; - if (sample_after_normalizing( - data.app_seed + data.example_counter++, begin_scores(ec.pred.a_s), end_scores(ec.pred.a_s), chosen_action)) + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(ec.pred.action_probs()), + end_scores(ec.pred.action_probs()), chosen_action)) THROW("Failed to sample from pdf"); CB::cb_class cl; cl.action = chosen_action + 1; - cl.probability = ec.pred.a_s[chosen_action].score; + cl.probability = ec.pred.action_probs()[chosen_action].score; if (!cl.action) THROW("No action with non-zero probability found!"); @@ -167,64 +161,59 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec) // Create a new cb label data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; + ec.l.cb() = data.cb_label; if (is_learn) base.learn(ec); data.a_s.clear(); - data.a_s = ec.pred.a_s; + data.a_s = std::move(ec.pred.action_probs()); + ec.l.reset(); if (use_cs) - ec.l.cs = csl; + ec.l.init_as_cs(std::move(csl)); else - ec.l.multi = ld; + ec.l.init_as_multi(std::move(ld)); - ec.pred.multiclass = cl.action; + ec.pred.reset(); + ec.pred.init_as_multiclass() = cl.action; } +// will call into cb_explore_adf must use cb labels template void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) { - // Store the multiclass or cost-sensitive input label - MULTICLASS::label_t ld; - COST_SENSITIVE::label csl; - if (use_cs) - csl = ec.l.cs; - else - ld = ec.l.multi; - copy_example_to_adf(data, ec); base.predict(data.adf_data.ecs); auto& out_ec = *data.adf_data.ecs[0]; uint32_t chosen_action; - if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), - end_scores(out_ec.pred.a_s), chosen_action)) + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.action_probs()), + end_scores(out_ec.pred.action_probs()), chosen_action)) THROW("Failed to sample from pdf"); CB::cb_class cl; - cl.action = out_ec.pred.a_s[chosen_action].action + 1; - cl.probability = out_ec.pred.a_s[chosen_action].score; + cl.action = out_ec.pred.action_probs()[chosen_action].action + 1; + cl.probability = out_ec.pred.action_probs()[chosen_action].score; if (!cl.action) THROW("No action with non-zero probability found!"); if (use_cs) - cl.cost = loss_cs(data, csl.costs, cl.action); + cl.cost = loss_cs(data, ec.l.cs().costs, cl.action); else - cl.cost = loss(data, ld.label, cl.action); + cl.cost = loss(data, ec.l.multi().label, cl.action); // add cb label to chosen action - auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb; + auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb(); lab.costs.clear(); lab.costs.push_back(cl); if (is_learn) base.learn(data.adf_data.ecs); - ec.pred.multiclass = cl.action; + ec.pred.multiclass() = cl.action; } void init_adf_data(cbify& data, const size_t num_actions) @@ -235,9 +224,10 @@ void init_adf_data(cbify& data, const size_t num_actions) adf_data.ecs.resize(num_actions); for (size_t a = 0; a < num_actions; ++a) { - adf_data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1); - auto& lab = adf_data.ecs[a]->l.cb; - CB::cb_label.default_label(&lab); + adf_data.ecs[a] = VW::alloc_examples(1); + auto& lab = adf_data.ecs[a]->l.init_as_cb(); + CB::default_label(lab); + adf_data.ecs[a]->pred.init_as_action_probs(); adf_data.ecs[a]->interactions = &data.all->interactions; } } @@ -246,68 +236,69 @@ template void do_actual_learning_ldf(cbify& data, multi_learner& base, multi_ex& ec_seq) { // change label and pred data for cb - if (data.cs_costs.size() < ec_seq.size()) - data.cs_costs.resize(ec_seq.size()); - if (data.cb_costs.size() < ec_seq.size()) - data.cb_costs.resize(ec_seq.size()); + if (data.cs_labels.size() < ec_seq.size()) + data.cs_labels.resize(ec_seq.size()); + if (data.cb_labels.size() < ec_seq.size()) + data.cb_labels.resize(ec_seq.size()); if (data.cb_as.size() < ec_seq.size()) data.cb_as.resize(ec_seq.size()); + for (size_t i = 0; i < ec_seq.size(); ++i) { auto& ec = *ec_seq[i]; - data.cs_costs[i] = ec.l.cs.costs; - data.cb_costs[i].clear(); - data.cb_as[i].clear(); - ec.l.cb.costs = data.cb_costs[i]; - ec.pred.a_s = data.cb_as[i]; + data.cs_labels[i] = std::move(ec.l.cs()); + + ec.l.reset(); + ec.l.init_as_cb(std::move(data.cb_labels[i])); + ec.pred.reset(); + ec.pred.init_as_action_probs(std::move(data.cb_as[i])); } base.predict(ec_seq); auto& out_ec = *ec_seq[0]; - uint32_t chosen_action; - if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), - end_scores(out_ec.pred.a_s), chosen_action)) + uint32_t chosen_action_index; + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.action_probs()), + end_scores(out_ec.pred.action_probs()), chosen_action_index)) THROW("Failed to sample from pdf"); + const auto chosen_action_zero_based = out_ec.pred.action_probs()[chosen_action_index].action; + const auto chosen_action_score = out_ec.pred.action_probs()[chosen_action_index].score; + const auto chosen_action_one_based = chosen_action_zero_based + 1; + CB::cb_class cl; - cl.action = out_ec.pred.a_s[chosen_action].action + 1; - cl.probability = out_ec.pred.a_s[chosen_action].score; + cl.action = chosen_action_one_based; + cl.probability = chosen_action_score; if (!cl.action) THROW("No action with non-zero probability found!"); - cl.cost = loss_csldf(data, data.cs_costs, cl.action); - - // add cb label to chosen action - data.cb_label.costs.clear(); - data.cb_label.costs.push_back(cl); - data.cb_costs[cl.action - 1] = ec_seq[cl.action - 1]->l.cb.costs; - ec_seq[cl.action - 1]->l.cb = data.cb_label; + cl.cost = loss_csldf(data, data.cs_labels, chosen_action_one_based); + ec_seq[chosen_action_zero_based]->l.cb().costs.push_back(cl); base.learn(ec_seq); + ec_seq[chosen_action_zero_based]->l.cb().costs.clear(); - // set cs prediction and reset cs costs + // Return labels and predictions to be reused and restore initial labels and preds for (size_t i = 0; i < ec_seq.size(); ++i) { auto& ec = *ec_seq[i]; - data.cb_as[i] = ec.pred.a_s; // store action_score vector for later reuse. - if (i == cl.action - 1) - data.cb_label = ec.l.cb; - else - data.cb_costs[i] = ec.l.cb.costs; - ec.l.cs.costs = data.cs_costs[i]; - if (i == cl.action - 1) - ec.pred.multiclass = cl.action; - else - ec.pred.multiclass = 0; + // Store the cb label back in data to be reused. + data.cb_labels[i] = std::move(ec.l.cb()); + ec.l.reset(); + ec.l.init_as_cs(std::move(data.cs_labels[i])); + + // store action_score vector for later reuse, then set the output prediction. + data.cb_as[i] = std::move(ec.pred.action_probs()); + ec.pred.reset(); + ec.pred.init_as_multiclass() = (i == cl.action - 1) ? cl.action : 0; } } void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq) { - const auto& costs = ec.l.cs.costs; + const auto& costs = ec.l.cs().costs; if (example_is_newline(ec)) return; @@ -318,9 +309,9 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq) float loss = 0.; - uint32_t predicted_class = ec.pred.multiclass; + uint32_t predicted_class = ec.pred.multiclass(); - if (!COST_SENSITIVE::cs_label.test_label(&ec.l)) + if (!COST_SENSITIVE::cs_label.test_label(ec.l)) { for (auto const& cost : costs) { @@ -338,7 +329,7 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq) } for (int sink : all.final_prediction_sink) - all.print_by_ref(sink, (float)ec.pred.multiclass, 0, ec.tag); + all.print_by_ref(sink, (float)ec.pred.multiclass(), 0, ec.tag); if (all.raw_prediction > 0) { @@ -354,7 +345,7 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq) all.print_text_by_ref(all.raw_prediction, outputStringStream.str(), ec.tag); } - COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(&ec.l), ec, ec_seq, false, predicted_class); + COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(ec.l), ec, ec_seq, false, predicted_class); } void output_example_seq(vw& all, multi_ex& ec_seq) @@ -369,7 +360,7 @@ void output_example_seq(vw& all, multi_ex& ec_seq) if (all.raw_prediction > 0) { - v_array empty = {nullptr, nullptr, nullptr, 0}; + v_array empty; all.print_text_by_ref(all.raw_prediction, "", empty); } } @@ -405,7 +396,6 @@ base_learner* cbify_setup(options_i& options, vw& all) data->use_adf = options.was_supplied("cb_explore_adf"); data->app_seed = uniform_hash("vw", 2, 0); - data->a_s = v_init(); data->all = &all; if (data->use_adf) @@ -452,7 +442,8 @@ base_learner* cbify_setup(options_i& options, vw& all) else l = &init_multiclass_learner(data, base, predict_or_learn, predict_or_learn, all.p, 1); } - all.delete_prediction = nullptr; + + l->label_type = use_cs ? label_type_t::cs : label_type_t::multi; return make_base(*l); } @@ -496,7 +487,6 @@ base_learner* cbifyldf_setup(options_i& options, vw& all) l.set_finish_example(finish_multiline_example); all.p->lp = COST_SENSITIVE::cs_label; - all.delete_prediction = nullptr; - + l.label_type = label_type_t::cs; return make_base(l); } diff --git a/vowpalwabbit/ccb_label.cc b/vowpalwabbit/ccb_label.cc index cd7b0176859..d352fb5da63 100644 --- a/vowpalwabbit/ccb_label.cc +++ b/vowpalwabbit/ccb_label.cc @@ -26,28 +26,28 @@ using namespace VW::config; namespace CCB { -void default_label(void* v); +void default_label(polylabel& v); -size_t read_cached_label(shared_data*, void* v, io_buf& cache) +size_t read_cached_label(shared_data*, polylabel& v, io_buf& cache) { // Since read_cached_features doesn't default the label we must do it here. default_label(v); - CCB::label* ld = static_cast(v); + CCB::label& ld = v.ccb(); - if (ld->outcome) + if (ld.outcome) { - ld->outcome->probabilities.clear(); + ld.outcome->probabilities.clear(); } - ld->explicit_included_actions.clear(); + ld.explicit_included_actions.clear(); size_t read_count = 0; char* read_ptr; - size_t next_read_size = sizeof(ld->type); + size_t next_read_size = sizeof(ld.type); if (cache.buf_read(read_ptr, next_read_size) < next_read_size) return 0; - ld->type = *(CCB::example_type*)read_ptr; - read_count += sizeof(ld->type); + ld.type = *(CCB::example_type*)read_ptr; + read_count += sizeof(ld.type); bool is_outcome_present; next_read_size = sizeof(bool); @@ -58,14 +58,13 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) if (is_outcome_present) { - ld->outcome = new CCB::conditional_contextual_bandit_outcome(); - ld->outcome->probabilities = v_init(); + ld.outcome = new CCB::conditional_contextual_bandit_outcome(); - next_read_size = sizeof(ld->outcome->cost); + next_read_size = sizeof(ld.outcome->cost); if (cache.buf_read(read_ptr, next_read_size) < next_read_size) return 0; - ld->outcome->cost = *(float*)read_ptr; - read_count += sizeof(ld->outcome->cost); + ld.outcome->cost = *(float*)read_ptr; + read_count += sizeof(ld.outcome->cost); uint32_t size_probs; next_read_size = sizeof(size_probs); @@ -83,7 +82,7 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) a_s = *(ACTION_SCORE::action_score*)read_ptr; read_count += sizeof(a_s); - ld->outcome->probabilities.push_back(a_s); + ld.outcome->probabilities.push_back(a_s); } } @@ -102,123 +101,97 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) return 0; include = *(uint32_t*)read_ptr; read_count += sizeof(include); - ld->explicit_included_actions.push_back(include); + ld.explicit_included_actions.push_back(include); } - next_read_size = sizeof(ld->weight); + next_read_size = sizeof(ld.weight); if (cache.buf_read(read_ptr, next_read_size) < next_read_size) return 0; - ld->weight = *(float*)read_ptr; + ld.weight = *(float*)read_ptr; return read_count; } -float ccb_weight(void* v) +float ccb_weight(polylabel& v) { - CCB::label* ld = (CCB::label*)v; - return ld->weight; + CCB::label& ld = (CCB::label&)v; + return ld.weight; } -void cache_label(void* v, io_buf& cache) +void cache_label(polylabel& v, io_buf& cache) { char* c; - CCB::label* ld = static_cast(v); + CCB::label& ld = v.ccb(); size_t size = sizeof(uint8_t) // type + sizeof(bool) // outcome exists? - + (ld->outcome == nullptr ? 0 - : sizeof(ld->outcome->cost) // cost - + sizeof(uint32_t) // probabilities size - + sizeof(ACTION_SCORE::action_score) * ld->outcome->probabilities.size()) // probabilities + + (ld.outcome == nullptr ? 0 + : sizeof(ld.outcome->cost) // cost + + sizeof(uint32_t) // probabilities size + + sizeof(ACTION_SCORE::action_score) * ld.outcome->probabilities.size()) // probabilities + sizeof(uint32_t) // explicit_included_actions size - + sizeof(uint32_t) * ld->explicit_included_actions.size() + sizeof(ld->weight); + + sizeof(uint32_t) * ld.explicit_included_actions.size() + sizeof(ld.weight); cache.buf_write(c, size); - *(uint8_t*)c = static_cast(ld->type); - c += sizeof(ld->type); + *(uint8_t*)c = static_cast(ld.type); + c += sizeof(ld.type); - *(bool*)c = ld->outcome != nullptr; + *(bool*)c = ld.outcome != nullptr; c += sizeof(bool); - if (ld->outcome != nullptr) + if (ld.outcome != nullptr) { - *(float*)c = ld->outcome->cost; - c += sizeof(ld->outcome->cost); + *(float*)c = ld.outcome->cost; + c += sizeof(ld.outcome->cost); - *(uint32_t*)c = convert(ld->outcome->probabilities.size()); + *(uint32_t*)c = convert(ld.outcome->probabilities.size()); c += sizeof(uint32_t); - for (const auto& score : ld->outcome->probabilities) + for (const auto& score : ld.outcome->probabilities) { *(ACTION_SCORE::action_score*)c = score; c += sizeof(ACTION_SCORE::action_score); } } - *(uint32_t*)c = convert(ld->explicit_included_actions.size()); + *(uint32_t*)c = convert(ld.explicit_included_actions.size()); c += sizeof(uint32_t); - for (const auto& included_action : ld->explicit_included_actions) + for (const auto& included_action : ld.explicit_included_actions) { *(uint32_t*)c = included_action; c += sizeof(included_action); } - *(float*)c = ld->weight; - c += sizeof(ld->weight); + *(float*)c = ld.weight; + c += sizeof(ld.weight); } -void default_label(void* v) +void default_label(polylabel& v) { - CCB::label* ld = static_cast(v); - - // This is tested against nullptr, so unfortunately as things are this must be deleted when not used. - if (ld->outcome) + if (v.get_type() != label_type_t::conditional_contextual_bandit) { - ld->outcome->probabilities.delete_v(); - delete ld->outcome; - ld->outcome = nullptr; - } - - ld->explicit_included_actions.clear(); - ld->type = example_type::unset; - ld->weight = 1.0; -} + v.reset(); + v.init_as_ccb(); -bool test_label(void* v) -{ - CCB::label* ld = static_cast(v); - return ld->outcome == nullptr; -} + } + CCB::label& ld = v.ccb(); -void delete_label(void* v) -{ - CCB::label* ld = static_cast(v); - if (ld->outcome) + // This is tested against nullptr, so unfortunately as things are this must be deleted when not used. + if (ld.outcome) { - ld->outcome->probabilities.delete_v(); - delete ld->outcome; - ld->outcome = nullptr; + delete ld.outcome; + ld.outcome = nullptr; } - ld->explicit_included_actions.delete_v(); + + ld.explicit_included_actions.clear(); + ld.type = example_type::unset; + ld.weight = 1.0; } -void copy_label(void* dst, void* src) +bool test_label(polylabel& v) { - CCB::label* ldDst = static_cast(dst); - CCB::label* ldSrc = static_cast(src); - - if (ldSrc->outcome) - { - ldDst->outcome = new CCB::conditional_contextual_bandit_outcome(); - ldDst->outcome->probabilities = v_init(); - - ldDst->outcome->cost = ldSrc->outcome->cost; - copy_array(ldDst->outcome->probabilities, ldSrc->outcome->probabilities); - } - - copy_array(ldDst->explicit_included_actions, ldSrc->explicit_included_actions); - ldDst->type = ldSrc->type; - ldDst->weight = ldSrc->weight; + CCB::label& ld = v.ccb(); + return ld.outcome == nullptr; } ACTION_SCORE::action_score convert_to_score(const VW::string_view& action_id_str, const VW::string_view& probability_str) @@ -247,16 +220,15 @@ CCB::conditional_contextual_bandit_outcome* parse_outcome(VW::string_view& outco { auto& ccb_outcome = *(new CCB::conditional_contextual_bandit_outcome()); - auto split_commas = v_init(); + v_array split_commas; tokenize(',', outcome, split_commas); - auto split_colons = v_init(); + v_array split_colons; tokenize(':', split_commas[0], split_colons); if (split_colons.size() != 3) THROW("Malformed ccb label"); - ccb_outcome.probabilities = v_init(); ccb_outcome.probabilities.push_back(convert_to_score(split_colons[0], split_colons[2])); ccb_outcome.cost = float_of_string(split_colons[1]); @@ -273,24 +245,21 @@ CCB::conditional_contextual_bandit_outcome* parse_outcome(VW::string_view& outco ccb_outcome.probabilities.push_back(convert_to_score(split_colons[0], split_colons[1])); } - split_colons.delete_v(); - split_commas.delete_v(); - return &ccb_outcome; } -void parse_explicit_inclusions(CCB::label* ld, v_array& split_inclusions) +void parse_explicit_inclusions(CCB::label& ld, v_array& split_inclusions) { for (const auto& inclusion : split_inclusions) { - ld->explicit_included_actions.push_back(int_of_string(inclusion)); + ld.explicit_included_actions.push_back(int_of_string(inclusion)); } } -void parse_label(parser* p, shared_data*, void* v, v_array& words) +void parse_label(parser* p, shared_data*, polylabel& v, v_array& words) { - CCB::label* ld = static_cast(v); - ld->weight = 1.0; + CCB::label& ld = v.ccb(); + ld.weight = 1.0; if (words.size() < 2) THROW("ccb labels may not be empty"); @@ -304,19 +273,19 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor { if (words.size() > 2) THROW("shared labels may not have a cost"); - ld->type = CCB::example_type::shared; + ld.type = CCB::example_type::shared; } else if (type == "action") { if (words.size() > 2) THROW("action labels may not have a cost"); - ld->type = CCB::example_type::action; + ld.type = CCB::example_type::action; } else if (type == "slot") { if (words.size() > 4) THROW("ccb slot label can only have a type cost and exclude list"); - ld->type = CCB::example_type::slot; + ld.type = CCB::example_type::slot; // Skip the first two words "ccb " for (size_t i = 2; i < words.size(); i++) @@ -324,12 +293,12 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor auto is_outcome = words[i].find(':'); if (is_outcome != VW::string_view::npos) { - if (ld->outcome != nullptr) + if (ld.outcome != nullptr) { THROW("There may be only 1 outcome associated with a slot.") } - ld->outcome = parse_outcome(words[i]); + ld.outcome = parse_outcome(words[i]); } else { @@ -339,9 +308,9 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor } // If a full distribution has been given, check if it sums to 1, otherwise throw. - if (ld->outcome && ld->outcome->probabilities.size() > 1) + if (ld.outcome && ld.outcome->probabilities.size() > 1) { - float total_pred = std::accumulate(ld->outcome->probabilities.begin(), ld->outcome->probabilities.end(), 0.f, + float total_pred = std::accumulate(ld.outcome->probabilities.begin(), ld.outcome->probabilities.end(), 0.f, [](float result_so_far, ACTION_SCORE::action_score action_pred) { return result_so_far + action_pred.score; }); @@ -360,6 +329,6 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor } // Export the definition of this label parser. -label_parser ccb_label_parser = {default_label, parse_label, cache_label, read_cached_label, delete_label, ccb_weight, - copy_label, test_label, sizeof(CCB::label)}; +label_parser ccb_label_parser = {default_label, parse_label, cache_label, read_cached_label, polylabel_delete_label, ccb_weight, + polylabel_copy_label, test_label, sizeof(CCB::label)}; } // namespace CCB diff --git a/vowpalwabbit/ccb_label.h b/vowpalwabbit/ccb_label.h index 8b6e341c4be..d18dd44b602 100644 --- a/vowpalwabbit/ccb_label.h +++ b/vowpalwabbit/ccb_label.h @@ -33,11 +33,67 @@ enum example_type : uint8_t struct label { - example_type type; + example_type type = example_type::unset; // Outcome may be unset. - conditional_contextual_bandit_outcome* outcome; + conditional_contextual_bandit_outcome* outcome = nullptr; v_array explicit_included_actions; - float weight; + float weight = 0.f; + + label() = default; + label(example_type type, conditional_contextual_bandit_outcome* outcome, v_array& explicit_included_actions, + float weight) + : type(type), outcome(outcome), explicit_included_actions(explicit_included_actions), weight(weight) + { + } + + label(label&& other) + { + type = example_type::unset; + std::swap(type, other.type); + outcome = nullptr; + std::swap(outcome, other.outcome); + explicit_included_actions.clear(); + std::swap(explicit_included_actions, other.explicit_included_actions); + weight = 0.f; + std::swap(weight, other.weight); + } + label& operator=(label&& other) + { + type = example_type::unset; + std::swap(type, other.type); + delete outcome; + outcome = nullptr; + std::swap(outcome, other.outcome); + + explicit_included_actions.clear(); + std::swap(explicit_included_actions, other.explicit_included_actions); + + weight = 0.f; + std::swap(weight, other.weight); + + return *this; + } + + label(const label& other) + { + type = other.type; + // todo copyconstructor of outcome + outcome = new conditional_contextual_bandit_outcome(*other.outcome); + explicit_included_actions = other.explicit_included_actions; + weight = other.weight; + } + + label& operator=(const label& other) + { + type = other.type; + delete outcome; + outcome = new conditional_contextual_bandit_outcome(*other.outcome); + explicit_included_actions = other.explicit_included_actions; + weight = other.weight; + return *this; + } + + ~label() { delete outcome; } }; extern label_parser ccb_label_parser; diff --git a/vowpalwabbit/classweight.cc b/vowpalwabbit/classweight.cc index 2728a33916b..1b0a06a757b 100644 --- a/vowpalwabbit/classweight.cc +++ b/vowpalwabbit/classweight.cc @@ -53,10 +53,10 @@ static void predict_or_learn(classweights& cweights, LEARNER::single_learner& ba switch (pred_type) { case prediction_type_t::scalar: - ec.weight *= cweights.get_class_weight((uint32_t)ec.l.simple.label); + ec.weight *= cweights.get_class_weight((uint32_t)ec.l.simple().label); break; case prediction_type_t::multiclass: - ec.weight *= cweights.get_class_weight(ec.l.multi.label); + ec.weight *= cweights.get_class_weight(ec.l.multi().label); break; default: // suppress the warning @@ -92,12 +92,21 @@ LEARNER::base_learner* classweight_setup(options_i& options, vw& all) LEARNER::learner* ret; if (base->pred_type == prediction_type_t::scalar) + { ret = &LEARNER::init_learner(cweights, base, predict_or_learn, predict_or_learn); + ret->label_type = label_type_t::simple; + } else if (base->pred_type == prediction_type_t::multiclass) + { ret = &LEARNER::init_learner(cweights, base, predict_or_learn, predict_or_learn); + ret->label_type = label_type_t::multi; + } else + { THROW("--classweight not implemented for this type of prediction"); + } + return make_base(*ret); } diff --git a/vowpalwabbit/comp_io.cc b/vowpalwabbit/comp_io.cc index cd784d0180f..14f93d7c1a1 100644 --- a/vowpalwabbit/comp_io.cc +++ b/vowpalwabbit/comp_io.cc @@ -6,6 +6,12 @@ #include "zlib.h" #include "comp_io.h" +// Comp io needs to override this as the default destructor checks for stdin by file descriptor and the file descriptor that is used by zlib collides. +comp_io_buf::~comp_io_buf() +{ + while (comp_io_buf::close_file()); +} + int comp_io_buf::open_file(const char* name, bool stdin_off, int flag) { gzFile fil = nullptr; diff --git a/vowpalwabbit/comp_io.h b/vowpalwabbit/comp_io.h index 45d1a17a18a..4bb7b8e3264 100644 --- a/vowpalwabbit/comp_io.h +++ b/vowpalwabbit/comp_io.h @@ -20,6 +20,8 @@ class comp_io_buf : public io_buf public: std::vector gz_files; + ~comp_io_buf() override; + int open_file(const char* name, bool stdin_off, int flag) override; void reset_file(int f) override; diff --git a/vowpalwabbit/conditional_contextual_bandit.cc b/vowpalwabbit/conditional_contextual_bandit.cc index f34618c4717..82aa98b74e1 100644 --- a/vowpalwabbit/conditional_contextual_bandit.cc +++ b/vowpalwabbit/conditional_contextual_bandit.cc @@ -12,7 +12,6 @@ #include "cb_adf.h" #include "cb_algs.h" #include "constant.h" -#include "v_array_pool.h" #include #include @@ -23,13 +22,6 @@ using namespace LEARNER; using namespace VW; using namespace VW::config; -template -void return_v_array(v_array& array, VW::v_array_pool& pool) -{ - array.clear(); - pool.return_object(array); -} - struct ccb { vw* all; @@ -51,9 +43,6 @@ struct ccb std::string id_namespace_str; size_t base_learner_stride_shift; - - VW::v_array_pool cb_label_pool; - VW::v_array_pool action_score_pool; }; namespace CCB @@ -76,7 +65,7 @@ bool split_multi_example_and_stash_labels(const multi_ex& examples, ccb& data) { for (auto ex : examples) { - switch (ex->l.conditional_contextual_bandit.type) + switch (ex->l.ccb().type) { case example_type::shared: data.shared = ex; @@ -93,8 +82,9 @@ bool split_multi_example_and_stash_labels(const multi_ex& examples, ccb& data) } // Stash the CCB labels before rewriting them. - data.stored_labels.push_back({ex->l.conditional_contextual_bandit.type, ex->l.conditional_contextual_bandit.outcome, - ex->l.conditional_contextual_bandit.explicit_included_actions, 0.}); + data.stored_labels.push_back(std::move(ex->l.ccb())); + // Since we have just moved out of the label we should reset to avoid using garbage memory. + ex->l.reset(); } return true; @@ -112,10 +102,11 @@ bool sanity_checks(ccb& data) if (is_learn) { - for (auto slot : data.slots) + auto first_slot_index = 1 /*shared*/ + data.actions.size(); + for (size_t index = first_slot_index; index < data.stored_labels.size(); index++) { - if (slot->l.conditional_contextual_bandit.outcome != nullptr && - slot->l.conditional_contextual_bandit.outcome->probabilities.size() == 0) + const auto& slot_label = data.stored_labels[index]; + if (slot_label.outcome != nullptr && slot_label.outcome->probabilities.size() == 0) { std::cerr << "ccb_adf_explore: badly formatted example - missing label probability"; return false; @@ -128,23 +119,23 @@ bool sanity_checks(ccb& data) // create empty/default cb labels void create_cb_labels(ccb& data) { - data.shared->l.cb.costs = data.cb_label_pool.get_object(); - data.shared->l.cb.costs.push_back(data.default_cb_label); + data.shared->l.init_as_cb(); + data.shared->l.cb().costs.push_back(data.default_cb_label); for (example* action : data.actions) { - action->l.cb.costs = data.cb_label_pool.get_object(); + action->l.reset(); + action->l.init_as_cb(); } - data.shared->l.cb.weight = 1.0; + data.shared->l.cb().weight = 1.0; } // the polylabel (union) must be manually cleaned up void delete_cb_labels(ccb& data) { - return_v_array(data.shared->l.cb.costs, data.cb_label_pool); - + data.shared->l.reset(); for (example* action : data.actions) { - return_v_array(action->l.cb.costs, data.cb_label_pool); + action->l.reset(); } } @@ -157,14 +148,13 @@ void attach_label_to_example( data.cb_label.probability = outcome->probabilities[0].score; data.cb_label.cost = outcome->cost; - example->l.cb.costs.push_back(data.cb_label); + example->l.cb().costs.push_back(data.cb_label); } void save_action_scores(ccb& data, decision_scores_t& decision_scores) { - auto& pred = data.shared->pred.a_s; - decision_scores.push_back(pred); - + decision_scores.push_back(std::move(data.shared->pred.action_probs())); + auto& pred = decision_scores[decision_scores.size() - 1]; // correct indices: we want index relative to the original ccb multi-example, with no actions filtered for (auto& action_score : pred) { @@ -181,7 +171,7 @@ void clear_pred_and_label(ccb& data) // Don't need to return to pool, as that will be done when the example is output. // This just needs to be cleared as it is reused. - data.actions[data.action_with_label]->l.cb.costs.clear(); + data.actions[data.action_with_label]->l.cb().costs.clear(); } // true if there exists at least 1 action in the cb multi-example @@ -321,9 +311,9 @@ void calculate_and_insert_interactions( // build a cb example from the ccb example template -void build_cb_example(multi_ex& cb_ex, example* slot, ccb& data) +void build_cb_example(multi_ex& cb_ex, example* slot, CCB::label& slot_label, ccb& data) { - bool slot_has_label = slot->l.conditional_contextual_bandit.outcome != nullptr; + bool slot_has_label = slot_label.outcome != nullptr; // Merge the slot features with the shared example and set it in the cb multi-example // TODO is it imporant for total_sum_feat_sq and num_features to be correct at this point? @@ -331,7 +321,7 @@ void build_cb_example(multi_ex& cb_ex, example* slot, ccb& data) cb_ex.push_back(data.shared); // Retrieve the action index whitelist (if the list is empty, then all actions are white-listed) - auto& explicit_includes = slot->l.conditional_contextual_bandit.explicit_included_actions; + auto& explicit_includes = slot_label.explicit_included_actions; if (explicit_includes.size() != 0) { // First time seeing this, initialize the vector with falses so we can start setting each included action. @@ -367,17 +357,20 @@ void build_cb_example(multi_ex& cb_ex, example* slot, ccb& data) data.origin_index[index++] = (uint32_t)i; // Remember the index of the chosen action - if (is_learn && slot_has_label && i == slot->l.conditional_contextual_bandit.outcome->probabilities[0].action) + if (is_learn && slot_has_label && i == slot_label.outcome->probabilities[0].action) { // This is used to remove the label later. data.action_with_label = (uint32_t)i; - attach_label_to_example(index, data.actions[i], slot->l.conditional_contextual_bandit.outcome, data); + attach_label_to_example(index, data.actions[i], slot_label.outcome, data); } } - // Must provide a prediction that cb can write into, this will be saved into the decision scores object later. - data.shared->pred.a_s = data.action_score_pool.get_object(); - + for (auto example : cb_ex) + { + example->pred.reset(); + example->pred.init_as_action_probs(); + } + // Tag can be used for specifying the sampling seed per slot. For it to be used it must be inserted into the shared // example. std::swap(data.shared->tag, slot->tag); @@ -389,6 +382,7 @@ template void learn_or_predict(ccb& data, multi_learner& base, multi_ex& examples) { clear_all(data); + data.stored_labels.reserve(examples.size()); if (!split_multi_example_and_stash_labels(examples, data)) // split shared, actions and slots return; @@ -403,7 +397,7 @@ void learn_or_predict(ccb& data, multi_learner& base, multi_ex& examples) // Reset exclusion list for this example. data.exclude_list.assign(data.actions.size(), false); - auto decision_scores = examples[0]->pred.decision_scores; + auto decision_scores = std::move(examples[0]->pred.decision_scores()); // for each slot, re-build the cb example and call cb_explore_adf size_t slot_id = 0; @@ -420,8 +414,9 @@ void learn_or_predict(ccb& data, multi_learner& base, multi_ex& examples) ex->interactions = &data.generated_interactions; } + const auto example_index = examples.size() - data.slots.size() + slot_id; data.include_list.clear(); - build_cb_example(data.cb_ex, slot, data); + build_cb_example(data.cb_ex, slot, data.stored_labels[example_index], data); if (data.all->audit) inject_slot_id(data, data.shared, slot_id); @@ -438,7 +433,7 @@ void learn_or_predict(ccb& data, multi_learner& base, multi_ex& examples) else { // the cb example contains no action => cannot decide - decision_scores.push_back(data.action_score_pool.get_object()); + decision_scores.push_back(ACTION_SCORE::action_scores()); } data.shared->interactions = data.original_interactions; @@ -464,12 +459,13 @@ void learn_or_predict(ccb& data, multi_learner& base, multi_ex& examples) // Restore ccb labels to the example objects. for (size_t i = 0; i < examples.size(); i++) { - examples[i]->l.conditional_contextual_bandit = { - data.stored_labels[i].type, data.stored_labels[i].outcome, data.stored_labels[i].explicit_included_actions, 0.}; + examples[i]->l.init_as_ccb(std::move(data.stored_labels[i])); } + data.stored_labels.clear(); // Save the predictions - examples[0]->pred.decision_scores = decision_scores; + examples[0]->pred.reset(); + examples[0]->pred.init_as_decision_scores(std::move(decision_scores)); } void print_decision_scores(int f, decision_scores_t& decision_scores) @@ -505,7 +501,7 @@ void print_update(vw& all, std::vector& slots, decision_scores_t& deci { counter++; - auto outcome = slot->l.conditional_contextual_bandit.outcome; + auto outcome = slot->l.ccb().outcome; if (outcome == nullptr) { label_str += delim; @@ -572,7 +568,7 @@ void output_example(vw& all, ccb& /*c*/, multi_ex& ec_seq) { num_features += ec->num_features; - if (ec->l.conditional_contextual_bandit.type == CCB::example_type::slot) + if (ec->l.ccb().type == CCB::example_type::slot) { slots.push_back(ec); } @@ -580,10 +576,10 @@ void output_example(vw& all, ccb& /*c*/, multi_ex& ec_seq) // Is it hold out? size_t num_labelled = 0; - auto preds = ec_seq[0]->pred.decision_scores; + auto& preds = ec_seq[0]->pred.decision_scores(); for (size_t i = 0; i < slots.size(); i++) { - auto outcome = slots[i]->l.conditional_contextual_bandit.outcome; + auto outcome = slots[i]->l.ccb().outcome; if (outcome != nullptr) { num_labelled++; @@ -605,7 +601,7 @@ void output_example(vw& all, ccb& /*c*/, multi_ex& ec_seq) all.sd->update(holdout_example, num_labelled > 0, loss, ec_seq[SHARED_EX_INDEX]->weight, num_features); for (auto sink : all.final_prediction_sink) - print_decision_scores(sink, ec_seq[SHARED_EX_INDEX]->pred.decision_scores); + print_decision_scores(sink, ec_seq[SHARED_EX_INDEX]->pred.decision_scores()); CCB::print_update(all, slots, preds, num_features); } @@ -618,12 +614,6 @@ void finish_multiline_example(vw& all, ccb& data, multi_ex& ec_seq) CB_ADF::global_print_newline(all.final_prediction_sink); } - for (auto& a_s : ec_seq[0]->pred.decision_scores) - { - return_v_array(a_s, data.action_score_pool); - } - ec_seq[0]->pred.decision_scores.clear(); - VW::finish_example(all, ec_seq); } @@ -663,7 +653,6 @@ base_learner* ccb_explore_adf_setup(options_i& options, vw& all) auto base = as_multiline(setup_base(options, all)); all.p->lp = CCB::ccb_label_parser; - all.label_type = label_type_t::ccb; // Stash the base learners stride_shift so we can properly add a feature later. data->base_learner_stride_shift = all.weights.stride_shift(); @@ -679,13 +668,15 @@ base_learner* ccb_explore_adf_setup(options_i& options, vw& all) data->id_namespace_hash = VW::hash_space(all, data->id_namespace_str); learner& l = - init_learner(data, base, learn_or_predict, learn_or_predict, 1, prediction_type_t::decision_probs); - - all.delete_prediction = ACTION_SCORE::delete_action_scores; - + init_learner(data, base, learn_or_predict, learn_or_predict, 1, prediction_type_t::decision_scores); + l.label_type = label_type_t::conditional_contextual_bandit; l.set_finish_example(finish_multiline_example); return make_base(l); } -bool ec_is_example_header(example const& ec) { return ec.l.conditional_contextual_bandit.type == example_type::shared; } +bool ec_is_example_header(example const& ec) +{ + return ec.l.get_type() == label_type_t::conditional_contextual_bandit && + ec.l.ccb().type == example_type::shared; +} } // namespace CCB diff --git a/vowpalwabbit/confidence.cc b/vowpalwabbit/confidence.cc index 591d849b88c..831c6f47cbc 100644 --- a/vowpalwabbit/confidence.cc +++ b/vowpalwabbit/confidence.cc @@ -21,20 +21,20 @@ void predict_or_learn_with_confidence(confidence& /* c */, single_learner& base, float threshold = 0.f; float sensitivity = 0.f; - float existing_label = ec.l.simple.label; + float existing_label = ec.l.simple().label; if (existing_label == FLT_MAX) { base.predict(ec); float opposite_label = 1.f; - if (ec.pred.scalar > 0) + if (ec.pred.scalar() > 0) opposite_label = -1.f; - ec.l.simple.label = opposite_label; + ec.l.simple().label = opposite_label; } if (!is_confidence_after_training) sensitivity = base.sensitivity(ec); - ec.l.simple.label = existing_label; + ec.l.simple().label = existing_label; if (is_learn) base.learn(ec); else @@ -43,7 +43,7 @@ void predict_or_learn_with_confidence(confidence& /* c */, single_learner& base, if (is_confidence_after_training) sensitivity = base.sensitivity(ec); - ec.confidence = fabsf(ec.pred.scalar - threshold) / sensitivity; + ec.confidence = fabsf(ec.pred.scalar() - threshold) / sensitivity; } void confidence_print_result(int f, float res, float confidence, v_array tag) @@ -64,7 +64,7 @@ void confidence_print_result(int f, float res, float confidence, v_array t void output_and_account_confidence_example(vw& all, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); all.sd->update(ec.test_only, ld.label != FLT_MAX, ec.loss, ec.weight, ec.num_features); if (ld.label != FLT_MAX && !ec.test_only) @@ -75,7 +75,7 @@ void output_and_account_confidence_example(vw& all, example& ec) for (size_t i = 0; i < all.final_prediction_sink.size(); i++) { int f = (int)all.final_prediction_sink[i]; - confidence_print_result(f, ec.pred.scalar, ec.confidence, ec.tag); + confidence_print_result(f, ec.pred.scalar(), ec.confidence, ec.tag); } print_update(all, ec); @@ -130,6 +130,7 @@ base_learner* confidence_setup(options_i& options, vw& all) data, as_singleline(setup_base(options, all)), learn_with_confidence_ptr, predict_with_confidence_ptr); l.set_finish_example(return_confidence_example); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 9df12d8f461..86afc2600ab 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -31,10 +31,10 @@ void name_value(VW::string_view& s, v_array& name, float& v) } } -char* bufread_label(label* ld, char* c, io_buf& cache) +char* bufread_label(label& ld, char* c, io_buf& cache) { size_t num = *(size_t*)c; - ld->costs.clear(); + ld.costs.clear(); c += sizeof(size_t); size_t total = sizeof(wclass) * num; if (cache.buf_read(c, (int)total) < total) @@ -46,16 +46,17 @@ char* bufread_label(label* ld, char* c, io_buf& cache) { wclass temp = *(wclass*)c; c += sizeof(wclass); - ld->costs.push_back(temp); + ld.costs.push_back(temp); } return c; } -size_t read_cached_label(shared_data*, void* v, io_buf& cache) +size_t read_cached_label(shared_data*, polylabel& v, io_buf& cache) { - label* ld = (label*)v; - ld->costs.clear(); + auto& ld = v.cs(); + + ld.costs.clear(); char* c; size_t total = sizeof(size_t); if (cache.buf_read(c, (int)total) < total) @@ -65,66 +66,57 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) return total; } -float weight(void*) { return 1.; } +float weight(polylabel&) { return 1.; } -char* bufcache_label(label* ld, char* c) +char* bufcache_label(label& ld, char* c) { - *(size_t*)c = ld->costs.size(); + *(size_t*)c = ld.costs.size(); c += sizeof(size_t); - for (unsigned int i = 0; i < ld->costs.size(); i++) + for (unsigned int i = 0; i < ld.costs.size(); i++) { - *(wclass*)c = ld->costs[i]; + *(wclass*)c = ld.costs[i]; c += sizeof(wclass); } return c; } -void cache_label(void* v, io_buf& cache) +void cache_label(polylabel& v, io_buf& cache) { char* c; - label* ld = (label*)v; - cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size()); + auto& ld = v.cs(); + cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld.costs.size()); bufcache_label(ld, c); } -void default_label(void* v) +void default_label(label& label) { label.costs.clear(); } + +void default_label(polylabel& v) { - label* ld = (label*)v; - ld->costs.clear(); + if (v.get_type() != label_type_t::cs) + { + v.reset(); + v.init_as_cs(); + } + + auto& ld = v.cs(); + default_label(ld); } -bool test_label(void* v) +bool test_label(polylabel& v) { - label* ld = (label*)v; - if (ld->costs.size() == 0) + auto& ld = v.cs(); + if (ld.costs.size() == 0) return true; - for (unsigned int i = 0; i < ld->costs.size(); i++) - if (FLT_MAX != ld->costs[i].x) + for (unsigned int i = 0; i < ld.costs.size(); i++) + if (FLT_MAX != ld.costs[i].x) return false; return true; } -void delete_label(void* v) +void parse_label(parser* p, shared_data* sd, polylabel& v, v_array& words) { - label* ld = (label*)v; - if (ld) - ld->costs.delete_v(); -} - -void copy_label(void* dst, void* src) -{ - if (dst && src) - { - label* ldD = (label*)dst; - label* ldS = (label*)src; - copy_array(ldD->costs, ldS->costs); - } -} - -void parse_label(parser* p, shared_data* sd, void* v, v_array& words) -{ - label* ld = (label*)v; - ld->costs.clear(); + auto& ld = v.cs(); + ld.costs.clear(); // handle shared and label first if (words.size() == 1) @@ -147,7 +139,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& else { wclass f = {-FLT_MAX, 0, 0., 0.}; - ld->costs.push_back(f); + ld.costs.push_back(f); } } if (eq_label) @@ -157,7 +149,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& else { wclass f = {float_of_string(p->parse_name[1]), 0, 0., 0.}; - ld->costs.push_back(f); + ld.costs.push_back(f); } } return; @@ -183,12 +175,12 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& else THROW("malformed cost specification on '" << (p->parse_name[0]) << "'"); - ld->costs.push_back(f); + ld.costs.push_back(f); } } -label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, - test_label, sizeof(label)}; +label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, polylabel_delete_label, weight, + polylabel_copy_label, test_label, sizeof(label)}; void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction) { @@ -218,12 +210,12 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act if (all.sd->ldict) { if (action_scores) - pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action); + pred_buf << all.sd->ldict->get(ec.pred.action_scores()[0].action); else pred_buf << all.sd->ldict->get(prediction); } else - pred_buf << ec.pred.a_s[0].action; + pred_buf << ec.pred.action_scores()[0].action; if (action_scores) pred_buf << "....."; all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features, @@ -238,13 +230,13 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act void output_example(vw& all, example& ec) { - label& ld = ec.l.cs; + label& ld = ec.l.cs(); float loss = 0.; - if (!test_label(&ld)) + if (!test_label(ec.l)) { // need to compute exact loss - size_t pred = (size_t)ec.pred.multiclass; + size_t pred = (size_t)ec.pred.multiclass(); float chosen_loss = FLT_MAX; float min = FLT_MAX; @@ -264,14 +256,14 @@ void output_example(vw& all, example& ec) // loss = chosen_loss; } - all.sd->update(ec.test_only, !test_label(&ld), loss, ec.weight, ec.num_features); + all.sd->update(ec.test_only, !test_label(ec.l), loss, ec.weight, ec.num_features); for (int sink : all.final_prediction_sink) if (!all.sd->ldict) - all.print_by_ref(sink, (float)ec.pred.multiclass, 0, ec.tag); + all.print_by_ref(sink, (float)ec.pred.multiclass(), 0, ec.tag); else { - VW::string_view sv_pred = all.sd->ldict->get(ec.pred.multiclass); + VW::string_view sv_pred = all.sd->ldict->get(ec.pred.multiclass()); all.print_text_by_ref(sink, sv_pred.to_string(), ec.tag); } @@ -288,7 +280,7 @@ void output_example(vw& all, example& ec) all.print_text_by_ref(all.raw_prediction, outputStringStream.str(), ec.tag); } - print_update(all, test_label(&ec.l.cs), ec, nullptr, false, ec.pred.multiclass); + print_update(all, test_label(ec.l), ec, nullptr, false, ec.pred.multiclass()); } void finish_example(vw& all, example& ec) @@ -299,7 +291,7 @@ void finish_example(vw& all, example& ec) bool example_is_test(example& ec) { - v_array costs = ec.l.cs.costs; + auto& costs = ec.l.cs().costs; if (costs.size() == 0) return true; for (size_t j = 0; j < costs.size(); j++) @@ -310,13 +302,18 @@ bool example_is_test(example& ec) bool ec_is_example_header(example const& ec) // example headers look like "shared" { - v_array costs = ec.l.cs.costs; - if (costs.size() != 1) - return false; - if (costs[0].class_index != 0) - return false; - if (costs[0].x != -FLT_MAX) - return false; - return true; + if (ec.l.get_type() == label_type_t::cs) + { + auto& costs = ec.l.cs().costs; + if (costs.size() != 1) + return false; + if (costs[0].class_index != 0) + return false; + if (costs[0].x != -FLT_MAX) + return false; + return true; + } + + return false; } } // namespace COST_SENSITIVE diff --git a/vowpalwabbit/cost_sensitive.h b/vowpalwabbit/cost_sensitive.h index bf216e6c2ea..14f1a45f71b 100644 --- a/vowpalwabbit/cost_sensitive.h +++ b/vowpalwabbit/cost_sensitive.h @@ -29,6 +29,9 @@ struct label v_array costs; }; +void delete_label(label& label); +void default_label(label& label); + void output_example(vw& all, example& ec); void finish_example(vw& all, example& ec); template diff --git a/vowpalwabbit/cs_active.cc b/vowpalwabbit/cs_active.cc index 804ec1c0879..786e3617285 100644 --- a/vowpalwabbit/cs_active.cc +++ b/vowpalwabbit/cs_active.cc @@ -61,8 +61,6 @@ struct cs_active size_t labels_outside_range; float distance_to_range; float range; - - ~cs_active() { examples_by_queries.delete_v(); } }; float binarySearch(float fhat, float delta, float sens, float tol) @@ -98,18 +96,18 @@ inline void inner_loop(cs_active& cs_a, single_learner& base, example& ec, uint3 if (is_learn) { vw& all = *cs_a.all; - ec.l.simple.weight = 1.; + ec.l.simple().weight = 1.; ec.weight = 1.; if (is_simulation) { // In simulation mode if (query_this_label) { - ec.l.simple.label = cost; + ec.l.simple().label = cost; all.sd->queries += 1; } else - ec.l.simple.label = FLT_MAX; + ec.l.simple().label = FLT_MAX; } else { @@ -118,16 +116,16 @@ inline void inner_loop(cs_active& cs_a, single_learner& base, example& ec, uint3 // If the cost of this label was not queried, then skip it. if (query_needed) { - ec.l.simple.label = cost; + ec.l.simple().label = cost; if ((cost < cs_a.cost_min) || (cost > cs_a.cost_max)) cerr << "warning: cost " << cost << " outside of cost range [" << cs_a.cost_min << ", " << cs_a.cost_max << "]!" << endl; } else - ec.l.simple.label = FLT_MAX; + ec.l.simple().label = FLT_MAX; } - if (ec.l.simple.label != FLT_MAX) + if (ec.l.simple().label != FLT_MAX) base.learn(ec, i - 1); } else if (!is_simulation) @@ -164,10 +162,10 @@ inline void find_cost_range(cs_active& cs_a, single_learner& base, example& ec, else { // finding max_pred and min_pred by binary search - max_pred = - std::min(ec.pred.scalar + sens * binarySearch(cs_a.cost_max - ec.pred.scalar, delta, sens, tol), cs_a.cost_max); - min_pred = - std::max(ec.pred.scalar - sens * binarySearch(ec.pred.scalar - cs_a.cost_min, delta, sens, tol), cs_a.cost_min); + max_pred = std::min( + ec.pred.scalar() + sens * binarySearch(cs_a.cost_max - ec.pred.scalar(), delta, sens, tol), cs_a.cost_max); + min_pred = std::max( + ec.pred.scalar() - sens * binarySearch(ec.pred.scalar() - cs_a.cost_min, delta, sens, tol), cs_a.cost_min); is_range_large = (max_pred - min_pred > eta); if (cs_a.print_debug_stuff) cerr << " find_cost_rangeB: i=" << i << " pp=" << ec.partial_prediction << " sens=" << sens << " eta=" << eta @@ -179,7 +177,7 @@ template void predict_or_learn(cs_active& cs_a, single_learner& base, example& ec) { // cerr << "------------- passthrough" << endl; - COST_SENSITIVE::label ld = ec.l.cs; + COST_SENSITIVE::label ld = std::move(ec.l.cs()); // cerr << "is_learn=" << is_learn << " ld.costs.size()=" << ld.costs.size() << endl; if (cs_a.all->sd->queries >= cs_a.min_labels * cs_a.num_classes) @@ -215,7 +213,10 @@ void predict_or_learn(cs_active& cs_a, single_learner& base, example& ec) uint32_t prediction = 1; float score = FLT_MAX; - ec.l.simple = {0., 0., 0.}; + ec.l.reset(); + ec.l.init_as_simple(); + ec.pred.reset(); + ec.pred.init_as_scalar(); float min_max_cost = FLT_MAX; float t = (float)cs_a.t; // ec.example_t; // current round @@ -269,7 +270,7 @@ void predict_or_learn(cs_active& cs_a, single_learner& base, example& ec) inner_loop(cs_a, base, ec, lqd.cl.class_index, lqd.cl.x, prediction, score, lqd.cl.partial_prediction, query_label, lqd.query_needed); if (lqd.query_needed) - ec.pred.multilabels.label_v.push_back(lqd.cl.class_index); + ec.pred.multilabels().label_v.push_back(lqd.cl.class_index); if (cs_a.print_debug_stuff) cerr << "label=" << lqd.cl.class_index << " x=" << lqd.cl.x << " prediction=" << prediction << " score=" << score << " pp=" << lqd.cl.partial_prediction << " ql=" << query_label @@ -279,7 +280,7 @@ void predict_or_learn(cs_active& cs_a, single_learner& base, example& ec) } // Need to pop metadata - cs_a.query_data.delete_v(); + cs_a.query_data.clear(); if (cs_a.all->sd->queries - queries > 0) cs_a.num_any_queries++; @@ -304,11 +305,13 @@ void predict_or_learn(cs_active& cs_a, single_learner& base, example& ec) } } - ec.pred.multiclass = prediction; - ec.l.cs = ld; + ec.pred.reset(); + ec.pred.init_as_multiclass() = prediction; + ec.l.reset(); + ec.l.init_as_cs(std::move(ld)); } -void finish_example(vw& all, cs_active& cs_a, example& ec) { CSOAA::finish_example(all, *(CSOAA::csoaa*)&cs_a, ec); } +void finish_example(vw& all, cs_active&, example& ec) { CSOAA::finish_example(all, ec); } base_learner* cs_active_setup(options_i& options, vw& all) { @@ -372,12 +375,13 @@ base_learner* cs_active_setup(options_i& options, vw& all) learner& l = simulation ? init_learner(data, as_singleline(setup_base(options, all)), predict_or_learn, - predict_or_learn, data->num_classes, prediction_type_t::multilabels) + predict_or_learn, data->num_classes, prediction_type_t::multiclass) : init_learner(data, as_singleline(setup_base(options, all)), predict_or_learn, - predict_or_learn, data->num_classes, prediction_type_t::multilabels); + predict_or_learn, data->num_classes, prediction_type_t::multiclass); l.set_finish_example(finish_example); base_learner* b = make_base(l); + l.label_type = label_type_t::cs; all.cost_sensitive = b; return b; } diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index a0d363c3b0b..aab6125e43f 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -22,8 +22,7 @@ namespace CSOAA struct csoaa { uint32_t num_classes; - polyprediction* pred; - ~csoaa() { free(pred); } + std::vector pred; }; template @@ -33,7 +32,7 @@ inline void inner_loop(single_learner& base, example& ec, uint32_t i, float cost if (is_learn) { ec.weight = (cost == FLT_MAX) ? 0.f : 1.f; - ec.l.simple.label = cost; + ec.l.simple().label = cost; base.learn(ec, i - 1); } else @@ -54,11 +53,15 @@ template void predict_or_learn(csoaa& c, single_learner& base, example& ec) { // std::cerr << "------------- passthrough" << std::endl; - COST_SENSITIVE::label ld = ec.l.cs; + COST_SENSITIVE::label ld = std::move(ec.l.cs()); uint32_t prediction = 1; float score = FLT_MAX; size_t pt_start = ec.passthrough ? ec.passthrough->size() : 0; - ec.l.simple = {0., 0., 0.}; + + ec.l.reset(); + ec.l.init_as_simple(); + ec.pred.reset(); + ec.pred.init_as_scalar(); if (!ld.costs.empty()) { for (auto& cl : ld.costs) @@ -67,15 +70,15 @@ void predict_or_learn(csoaa& c, single_learner& base, example& ec) } else if (DO_MULTIPREDICT && !is_learn) { - ec.l.simple = {FLT_MAX, 0.f, 0.f}; - base.multipredict(ec, 0, c.num_classes, c.pred, false); + ec.l.simple() = {FLT_MAX, 0.f, 0.f}; + base.multipredict(ec, 0, c.num_classes, c.pred.data(), false); for (uint32_t i = 1; i <= c.num_classes; i++) { - add_passthrough_feature(ec, i, c.pred[i - 1].scalar); - if (c.pred[i - 1].scalar < c.pred[prediction - 1].scalar) + add_passthrough_feature(ec, i, c.pred[i - 1].scalar()); + if (c.pred[i - 1].scalar() < c.pred[prediction - 1].scalar()) prediction = i; } - ec.partial_prediction = c.pred[prediction - 1].scalar; + ec.partial_prediction = c.pred[prediction - 1].scalar(); } else { @@ -105,11 +108,15 @@ void predict_or_learn(csoaa& c, single_learner& base, example& ec) add_passthrough_feature(ec, constant * 3, 1.); } - ec.pred.multiclass = prediction; - ec.l.cs = ld; + ec.pred.reset(); + ec.pred.init_as_multiclass() = prediction; + ec.l.reset(); + ec.l.init_as_cs(std::move(ld)); } void finish_example(vw& all, csoaa&, example& ec) { COST_SENSITIVE::finish_example(all, ec); } +void finish_example(vw& all, example& ec) { COST_SENSITIVE::finish_example(all, ec); } + base_learner* csoaa_setup(options_i& options, vw& all) { @@ -121,14 +128,18 @@ base_learner* csoaa_setup(options_i& options, vw& all) if (!options.was_supplied("csoaa")) return nullptr; - c->pred = calloc_or_throw(c->num_classes); + c->pred.resize(c->num_classes); + for (auto& pred : c->pred) + { + pred.init_as_scalar(); + } learner& l = init_learner(c, as_singleline(setup_base(*all.options, all)), predict_or_learn, predict_or_learn, c->num_classes, prediction_type_t::multiclass); all.p->lp = cs_label; - all.label_type = label_type_t::cs; l.set_finish_example(finish_example); + l.label_type = label_type_t::cs; all.cost_sensitive = make_base(l); return all.cost_sensitive; } @@ -153,12 +164,6 @@ struct ldf uint64_t ft_offset; v_array stored_preds; - - ~ldf() - { - a_s.delete_v(); - stored_preds.delete_v(); - } }; bool ec_is_label_definition(example& ec) // label defs look like "0:___" or just "label:___" @@ -167,7 +172,7 @@ bool ec_is_label_definition(example& ec) // label defs look like "0:___" or jus return false; if (ec.indices[0] != 'l') return false; - const auto& costs = ec.l.cs.costs; + const auto& costs = ec.l.cs().costs; for (auto const& cost : costs) if ((cost.class_index != 0) || (cost.x <= 0.)) return false; @@ -246,14 +251,18 @@ void unsubtract_example(example* ec) void make_single_prediction(ldf& data, single_learner& base, example& ec) { - COST_SENSITIVE::label ld = ec.l.cs; + COST_SENSITIVE::label ld = std::move(ec.l.cs()); label_data simple_label; - simple_label.initial = 0.; simple_label.label = FLT_MAX; LabelDict::add_example_namespace_from_memory(data.label_features, ec, ld.costs[0].class_index); - ec.l.simple = simple_label; + ec.l.reset(); + ec.l.init_as_simple(simple_label); + + ec.pred.reset(); + ec.pred.init_as_scalar(0.f); + uint64_t old_offset = ec.ft_offset; ec.ft_offset = data.ft_offset; base.predict(ec); // make a prediction @@ -261,7 +270,8 @@ void make_single_prediction(ldf& data, single_learner& base, example& ec) ld.costs[0].partial_prediction = ec.partial_prediction; LabelDict::del_example_namespace_from_memory(data.label_features, ec, ld.costs[0].class_index); - ec.l.cs = ld; + ec.l.reset(); + ec.l.init_as_cs(std::move(ld)); } bool test_ldf_sequence(ldf& data, multi_ex& ec_seq) @@ -270,13 +280,13 @@ bool test_ldf_sequence(ldf& data, multi_ex& ec_seq) if (ec_seq.empty()) isTest = true; else - isTest = COST_SENSITIVE::cs_label.test_label(&ec_seq[0]->l); + isTest = COST_SENSITIVE::cs_label.test_label(ec_seq[0]->l); for (const auto& ec : ec_seq) { // Each sub-example must have just one cost - assert(ec->l.cs.costs.size() == 1); + assert(ec->l.cs().costs.size() == 1); - if (COST_SENSITIVE::cs_label.test_label(&ec->l) != isTest) + if (COST_SENSITIVE::cs_label.test_label(ec->l) != isTest) { isTest = true; data.all->trace_message << "warning: ldf example has mix of train/test data; assuming test" << std::endl; @@ -289,18 +299,17 @@ void do_actual_learning_wap(ldf& data, single_learner& base, multi_ex& ec_seq) { size_t K = ec_seq.size(); std::vector all_costs; - for (const auto& example : ec_seq) all_costs.push_back(&example->l.cs.costs[0]); + for (const auto& example : ec_seq) all_costs.push_back(&example->l.cs().costs[0]); compute_wap_values(all_costs); for (size_t k1 = 0; k1 < K; k1++) { example* ec1 = ec_seq[k1]; - // save original variables - COST_SENSITIVE::label save_cs_label = ec1->l.cs; - label_data& simple_label = ec1->l.simple; + // Save original label. + COST_SENSITIVE::label save_cs_label(std::move(ec1->l.cs())); - v_array costs1 = save_cs_label.costs; + auto& costs1 = save_cs_label.costs; if (costs1[0].class_index == (uint32_t)-1) continue; @@ -309,7 +318,7 @@ void do_actual_learning_wap(ldf& data, single_learner& base, multi_ex& ec_seq) for (size_t k2 = k1 + 1; k2 < K; k2++) { example* ec2 = ec_seq[k2]; - v_array costs2 = ec2->l.cs.costs; + auto& costs2 = ec2->l.cs().costs; if (costs2[0].class_index == (uint32_t)-1) continue; @@ -321,8 +330,10 @@ void do_actual_learning_wap(ldf& data, single_learner& base, multi_ex& ec_seq) LabelDict::add_example_namespace_from_memory(data.label_features, *ec2, costs2[0].class_index); // learn - simple_label.initial = 0.; + ec1->l.reset(); + label_data& simple_label = ec1->l.init_as_simple(); simple_label.label = (costs1[0].x < costs2[0].x) ? -1.0f : 1.0f; + float old_weight = ec1->weight; ec1->weight = value_diff; ec1->partial_prediction = 0.; @@ -338,8 +349,9 @@ void do_actual_learning_wap(ldf& data, single_learner& base, multi_ex& ec_seq) } LabelDict::del_example_namespace_from_memory(data.label_features, *ec1, costs1[0].class_index); - // restore original cost-sensitive label, sum of importance weights - ec1->l.cs = save_cs_label; + // Restore original cost-sensitive label, sum of importance weights. + ec1->l.reset(); + ec1->l.init_as_cs(std::move(save_cs_label)); // TODO: What about partial_prediction? See do_actual_learning_oaa. } } @@ -351,7 +363,7 @@ void do_actual_learning_oaa(ldf& data, single_learner& base, multi_ex& ec_seq) for (const auto& example : ec_seq) { - float ec_cost = example->l.cs.costs[0].x; + float ec_cost = example->l.cs().costs[0].x; if (ec_cost < min_cost) min_cost = ec_cost; if (ec_cost > max_cost) @@ -361,7 +373,7 @@ void do_actual_learning_oaa(ldf& data, single_learner& base, multi_ex& ec_seq) for (const auto& ec : ec_seq) { // save original variables - label save_cs_label = ec->l.cs; + label save_cs_label = std::move(ec->l.cs()); const auto& costs = save_cs_label.costs; // build example for the base learner @@ -384,7 +396,8 @@ void do_actual_learning_oaa(ldf& data, single_learner& base, multi_ex& ec_seq) ec->weight = old_weight * (costs[0].x - min_cost); } } - ec->l.simple = simple_label; + ec->l.reset(); + ec->l.init_as_simple(simple_label); // learn LabelDict::add_example_namespace_from_memory(data.label_features, *ec, costs[0].class_index); @@ -396,8 +409,9 @@ void do_actual_learning_oaa(ldf& data, single_learner& base, multi_ex& ec_seq) ec->weight = old_weight; // restore original cost-sensitive label, sum of importance weights and partial_prediction - ec->l.cs = save_cs_label; ec->partial_prediction = costs[0].partial_prediction; + ec->l.reset(); + ec->l.init_as_cs(std::move(save_cs_label)); } } @@ -446,7 +460,7 @@ void do_actual_learning(ldf& data, single_learner& base, multi_ex& ec_seq_all) for (uint32_t k = 0; k < K; k++) { example* ec = ec_seq[k]; - data.stored_preds.push_back(ec->pred.a_s); + data.stored_preds.push_back(std::move(ec->pred.action_scores())); make_single_prediction(data, base, *ec); action_score s; s.score = ec->partial_prediction; @@ -480,45 +494,47 @@ void do_actual_learning(ldf& data, single_learner& base, multi_ex& ec_seq_all) do_actual_learning_oaa(data, base, ec_seq); } - if (data.rank) + // Clear the existing prediction + for (auto& example : ec_seq) { - data.stored_preds[0].clear(); - for (size_t k = 0; k < K; k++) - { - ec_seq[k]->pred.a_s = data.stored_preds[k]; - ec_seq[0]->pred.a_s.push_back(data.a_s[k]); - } + example->pred.reset(); } - else + + // Set the prediction. + if (data.rank) { - // Mark the predicted subexample with its class_index, all other with 0 + data.stored_preds[0].clear(); for (size_t k = 0; k < K; k++) { - if (k == predicted_K) - ec_seq[k]->pred.multiclass = ec_seq[k]->l.cs.costs[0].class_index; - else - ec_seq[k]->pred.multiclass = 0; + ec_seq[k]->pred.init_as_action_scores() = std::move(data.stored_preds[k]); + ec_seq[0]->pred.action_scores().push_back(data.a_s[k]); } } - - ////////////////////// compute probabilities - if (data.is_probabilities) + else if (data.is_probabilities) { float sum_prob = 0; - for (const auto& example : ec_seq) + for (auto& example : ec_seq) { // probability(correct_class) = 1 / (1+exp(-score)), where score is higher for better classes, // but partial_prediction is lower for better classes (we are predicting the cost), // so we need to take score = -partial_prediction, // thus probability(correct_class) = 1 / (1+exp(-(-partial_prediction))) float prob = 1.f / (1.f + correctedExp(example->partial_prediction)); - example->pred.prob = prob; + example->pred.init_as_prob() = prob; sum_prob += prob; } // make sure that the probabilities sum up (exactly) to one - for (const auto& example : ec_seq) + for (auto& example : ec_seq) + { + example->pred.prob() /= sum_prob; + } + } + else + { + // Mark the predicted subexample with its class_index, all other with 0 + for (size_t k = 0; k < K; k++) { - example->pred.prob /= sum_prob; + ec_seq[k]->pred.init_as_multiclass() = k == predicted_K ? ec_seq[k]->l.cs().costs[0].class_index : 0; } } } @@ -538,8 +554,8 @@ void global_print_newline(vw& all) void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq, ldf& data) { - label& ld = ec.l.cs; - v_array costs = ld.costs; + label& ld = ec.l.cs(); + v_array& costs = ld.costs; if (example_is_newline(ec)) return; @@ -554,7 +570,7 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq, ldf& if (data.is_probabilities) { // predicted_K was already computed in do_actual_learning(), - // but we cannot store it in ec.pred union because we store ec.pred.prob there. + // but we cannot store it in ec.pred union because we store ec.pred.prob() there. // So we must compute it again. uint32_t predicted_K = 0; float min_score = FLT_MAX; @@ -567,12 +583,12 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq, ldf& predicted_K = (uint32_t)k; } } - predicted_class = (*ec_seq)[predicted_K]->l.cs.costs[0].class_index; + predicted_class = (*ec_seq)[predicted_K]->l.cs().costs[0].class_index; } else - predicted_class = ec.pred.multiclass; + predicted_class = ec.pred.multiclass(); - if (!COST_SENSITIVE::cs_label.test_label(&ec.l)) + if (!COST_SENSITIVE::cs_label.test_label(ec.l)) { for (auto const& cost : costs) { @@ -590,7 +606,7 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq, ldf& } for (int sink : all.final_prediction_sink) - all.print_by_ref(sink, data.is_probabilities ? ec.pred.prob : (float)ec.pred.multiclass, 0, ec.tag); + all.print_by_ref(sink, data.is_probabilities ? ec.pred.prob() : (float)ec.pred.multiclass(), 0, ec.tag); if (all.raw_prediction > 0) { @@ -606,12 +622,12 @@ void output_example(vw& all, example& ec, bool& hit_loss, multi_ex* ec_seq, ldf& all.print_text_by_ref(all.raw_prediction, outputStringStream.str(), ec.tag); } - COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(&ec.l), ec, ec_seq, false, predicted_class); + COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(ec.l), ec, ec_seq, false, predicted_class); } void output_rank_example(vw& all, example& head_ec, bool& hit_loss, multi_ex* ec_seq) { - const auto& costs = head_ec.l.cs.costs; + const auto& costs = head_ec.l.cs().costs; if (example_is_newline(head_ec)) return; @@ -621,9 +637,9 @@ void output_rank_example(vw& all, example& head_ec, bool& hit_loss, multi_ex* ec all.sd->total_features += head_ec.num_features; float loss = 0.; - v_array& preds = head_ec.pred.a_s; + v_array& preds = head_ec.pred.action_scores(); - if (!COST_SENSITIVE::cs_label.test_label(&head_ec.l)) + if (!COST_SENSITIVE::cs_label.test_label(head_ec.l)) { size_t idx = 0; for (example* ex : *ec_seq) @@ -632,7 +648,7 @@ void output_rank_example(vw& all, example& head_ec, bool& hit_loss, multi_ex* ec break; if (preds[0].action == idx) { - loss = ex->l.cs.costs[0].x; + loss = ex->l.cs().costs[0].x; hit_loss = true; } idx++; @@ -642,7 +658,7 @@ void output_rank_example(vw& all, example& head_ec, bool& hit_loss, multi_ex* ec assert(loss >= 0); } - for (int sink : all.final_prediction_sink) print_action_score(sink, head_ec.pred.a_s, head_ec.tag); + for (int sink : all.final_prediction_sink) print_action_score(sink, head_ec.pred.action_scores(), head_ec.tag); if (all.raw_prediction > 0) { @@ -658,7 +674,7 @@ void output_rank_example(vw& all, example& head_ec, bool& hit_loss, multi_ex* ec all.print_text_by_ref(all.raw_prediction, outputStringStream.str(), head_ec.tag); } - COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(&head_ec.l), head_ec, ec_seq, true, 0); + COST_SENSITIVE::print_update(all, COST_SENSITIVE::cs_label.test_label(head_ec.l), head_ec, ec_seq, true, 0); } void output_example_seq(vw& all, ldf& data, multi_ex& ec_seq) @@ -680,7 +696,7 @@ void output_example_seq(vw& all, ldf& data, multi_ex& ec_seq) if (all.raw_prediction > 0) { - v_array empty = {nullptr, nullptr, nullptr, 0}; + v_array empty; all.print_text_by_ref(all.raw_prediction, "", empty); } @@ -691,7 +707,7 @@ void output_example_seq(vw& all, ldf& data, multi_ex& ec_seq) for (size_t k = 0; k < K; k++) { - float ec_cost = ec_seq[k]->l.cs.costs[0].x; + float ec_cost = ec_seq[k]->l.cs().costs[0].x; if (ec_cost < min_cost) { min_cost = ec_cost; @@ -700,7 +716,7 @@ void output_example_seq(vw& all, ldf& data, multi_ex& ec_seq) } float multiclass_log_loss = 999; // -log(0) = plus infinity - float correct_class_prob = ec_seq[correct_class_k]->pred.prob; + float correct_class_prob = ec_seq[correct_class_k]->pred.prob(); if (correct_class_prob > 0) multiclass_log_loss = -log(correct_class_prob); @@ -736,8 +752,8 @@ void finish_multiline_example(vw& all, ldf& data, multi_ex& ec_seq) */ void inline process_label(ldf& data, example* ec) { - //auto new_fs = ec->feature_space[ec->indices[0]]; - auto& costs = ec->l.cs.costs; + // auto new_fs = ec->feature_space[ec->indices[0]]; + auto& costs = ec->l.cs().costs; for (auto const& cost : costs) { const auto lab = (size_t)cost.x; @@ -829,11 +845,8 @@ base_learner* csldf_setup(options_i& options, vw& all) } if (options.was_supplied("ldf_override")) ldf_arg = ldf_override; - if (ld->rank) - all.delete_prediction = delete_action_scores; all.p->lp = COST_SENSITIVE::cs_label; - all.label_type = label_type_t::cs; ld->treat_as_classifier = false; if (ldf_arg == "multiline" || ldf_arg == "m") @@ -866,6 +879,9 @@ base_learner* csldf_setup(options_i& options, vw& all) ld->label_features.reserve(256); prediction_type_t pred_type; + if (ld->rank && ld->is_probabilities) + THROW("Cannot specify both csoaa_rank and probabilities at the same time."); + if (ld->rank) pred_type = prediction_type_t::action_scores; else if (ld->is_probabilities) @@ -878,6 +894,7 @@ base_learner* csldf_setup(options_i& options, vw& all) do_actual_learning, 1, pred_type); l.set_finish_example(finish_multiline_example); l.set_end_pass(end_pass); + l.label_type = label_type_t::cs; all.cost_sensitive = make_base(l); return all.cost_sensitive; } diff --git a/vowpalwabbit/csoaa.h b/vowpalwabbit/csoaa.h index 8ef69503045..078477591d6 100644 --- a/vowpalwabbit/csoaa.h +++ b/vowpalwabbit/csoaa.h @@ -11,4 +11,5 @@ LEARNER::base_learner* csoaa_setup(VW::config::options_i& options, vw& all); LEARNER::base_learner* csldf_setup(VW::config::options_i& options, vw& all); struct csoaa; void finish_example(vw& all, csoaa&, example& ec); +void finish_example(vw& all, example& ec); } // namespace CSOAA diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc index cbfc786b36a..dcd702f25b2 100644 --- a/vowpalwabbit/ect.cc +++ b/vowpalwabbit/ect.cc @@ -47,24 +47,9 @@ struct ect uint32_t last_pair; v_array tournaments_won; - - ~ect() - { - for (auto& all_level : all_levels) - { - for (auto& t : all_level) t.delete_v(); - all_level.delete_v(); - } - all_levels.delete_v(); - final_nodes.delete_v(); - up_directions.delete_v(); - directions.delete_v(); - down_directions.delete_v(); - tournaments_won.delete_v(); - } }; -bool exists(v_array db) +bool exists(const v_array& db) { for (unsigned long i : db) if (i != 0) @@ -104,8 +89,8 @@ size_t create_circuit(ect& e, uint64_t max_label, uint64_t eliminations) if (max_label == 1) return 0; - v_array> tournaments = v_init>(); - v_array t = v_init(); + v_array> tournaments; + v_array t; for (uint32_t i = 0; i < max_label; i++) { @@ -114,11 +99,11 @@ size_t create_circuit(ect& e, uint64_t max_label, uint64_t eliminations) e.directions.push_back(d); } - tournaments.push_back(t); + tournaments.push_back(std::move(t)); for (size_t i = 0; i < eliminations - 1; i++) tournaments.push_back(v_array()); - e.all_levels.push_back(tournaments); + e.all_levels.push_back(std::move(tournaments)); size_t level = 0; @@ -126,22 +111,21 @@ size_t create_circuit(ect& e, uint64_t max_label, uint64_t eliminations) while (not_empty(e.all_levels[level])) { - v_array> new_tournaments = v_init>(); - tournaments = e.all_levels[level]; + v_array> new_tournaments; + auto& current_tournaments = e.all_levels[level]; - for (size_t t = 0; t < tournaments.size(); t++) + for (size_t t = 0; t < current_tournaments.size(); t++) { - v_array empty = v_init(); - new_tournaments.push_back(empty); + new_tournaments.push_back(v_array()); } - for (size_t t = 0; t < tournaments.size(); t++) + for (size_t t = 0; t < current_tournaments.size(); t++) { - for (size_t j = 0; j < tournaments[t].size() / 2; j++) + for (size_t j = 0; j < current_tournaments[t].size() / 2; j++) { uint32_t id = node++; - uint32_t left = tournaments[t][2 * j]; - uint32_t right = tournaments[t][2 * j + 1]; + uint32_t left = current_tournaments[t][2 * j]; + uint32_t right = current_tournaments[t][2 * j + 1]; direction d = {id, t, 0, 0, left, right, false}; e.directions.push_back(d); @@ -157,10 +141,10 @@ size_t create_circuit(ect& e, uint64_t max_label, uint64_t eliminations) if (e.directions[left].last) e.directions[left].winner = direction_index; - if (tournaments[t].size() == 2 && (t == 0 || tournaments[t - 1].empty())) + if (current_tournaments[t].size() == 2 && (t == 0 || current_tournaments[t - 1].empty())) { e.directions[direction_index].last = true; - if (t + 1 < tournaments.size()) + if (t + 1 < current_tournaments.size()) new_tournaments[t + 1].push_back(id); else // winner eliminated. e.directions[direction_index].winner = 0; @@ -168,15 +152,15 @@ size_t create_circuit(ect& e, uint64_t max_label, uint64_t eliminations) } else new_tournaments[t].push_back(id); - if (t + 1 < tournaments.size()) + if (t + 1 < current_tournaments.size()) new_tournaments[t + 1].push_back(id); else // loser eliminated. e.directions[direction_index].loser = 0; } - if (tournaments[t].size() % 2 == 1) - new_tournaments[t].push_back(tournaments[t].last()); + if (current_tournaments[t].size() % 2 == 1) + new_tournaments[t].push_back(current_tournaments[t].last()); } - e.all_levels.push_back(new_tournaments); + e.all_levels.push_back(std::move(new_tournaments)); level++; } @@ -196,7 +180,10 @@ uint32_t ect_predict(ect& e, single_learner& base, example& ec) uint32_t finals_winner = 0; // Binary final elimination tournament first - ec.l.simple = {FLT_MAX, 0., 0.}; + ec.l.reset(); + ec.l.init_as_simple(FLT_MAX, 0.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(); for (size_t i = e.tree_height - 1; i != (size_t)0 - 1; i--) { @@ -207,7 +194,7 @@ uint32_t ect_predict(ect& e, single_learner& base, example& ec) base.learn(ec, problem_number); - if (ec.pred.scalar > e.class_boundary) + if (ec.pred.scalar() > e.class_boundary) finals_winner = finals_winner | (((size_t)1) << i); } } @@ -217,7 +204,7 @@ uint32_t ect_predict(ect& e, single_learner& base, example& ec) { base.learn(ec, id - e.k); - if (ec.pred.scalar > e.class_boundary) + if (ec.pred.scalar() > e.class_boundary) id = e.directions[id].right; else id = e.directions[id].left; @@ -229,7 +216,7 @@ void ect_train(ect& e, single_learner& base, example& ec) { if (e.k == 1) // nothing to do return; - MULTICLASS::label_t mc = ec.l.multi; + MULTICLASS::label_t mc = ec.l.multi(); label_data simple_temp; @@ -246,14 +233,17 @@ void ect_train(ect& e, single_learner& base, example& ec) else simple_temp.label = 1; - ec.l.simple = simple_temp; + ec.l.reset(); + ec.l.init_as_simple(simple_temp); + ec.pred.reset(); + ec.pred.init_as_scalar(); base.learn(ec, id - e.k); float old_weight = ec.weight; ec.weight = 0.; base.learn(ec, id - e.k); // inefficient, we should extract final prediction exactly. ec.weight = old_weight; - bool won = (ec.pred.scalar - e.class_boundary) * simple_temp.label > 0; + bool won = (ec.pred.scalar() - e.class_boundary) * simple_temp.label > 0; if (won) { @@ -296,13 +286,14 @@ void ect_train(ect& e, single_learner& base, example& ec) else simple_temp.label = 1; simple_temp.weight = (float)(1 << (e.tree_height - i - 1)); - ec.l.simple = simple_temp; + ec.l.reset(); + ec.l.init_as_simple(simple_temp); uint32_t problem_number = e.last_pair + j * (1 << (i + 1)) + (1 << i) - 1; base.learn(ec, problem_number); - if (ec.pred.scalar > e.class_boundary) + if (ec.pred.scalar() > e.class_boundary) e.tournaments_won[j] = right; else e.tournaments_won[j] = left; @@ -316,23 +307,32 @@ void ect_train(ect& e, single_learner& base, example& ec) void predict(ect& e, single_learner& base, example& ec) { - MULTICLASS::label_t mc = ec.l.multi; + MULTICLASS::label_t mc = ec.l.multi(); if (mc.label == 0 || (mc.label > e.k && mc.label != (uint32_t)-1)) std::cout << "label " << mc.label << " is not in {1," << e.k << "} This won't work right." << std::endl; - ec.pred.multiclass = ect_predict(e, base, ec); - ec.l.multi = mc; + + auto pred = ect_predict(e, base, ec); + ec.pred.reset(); + ec.pred.init_as_multiclass() = pred; + + ec.l.reset(); + ec.l.init_as_multi(mc); } void learn(ect& e, single_learner& base, example& ec) { - MULTICLASS::label_t mc = ec.l.multi; + MULTICLASS::label_t mc = ec.l.multi(); predict(e, base, ec); - uint32_t pred = ec.pred.multiclass; + uint32_t pred = ec.pred.multiclass(); if (mc.label != (uint32_t)-1) ect_train(e, base, ec); - ec.l.multi = mc; - ec.pred.multiclass = pred; + + ec.l.reset(); + ec.l.init_as_multi(mc); + + ec.pred.reset(); + ec.pred.init_as_multiclass() = pred; } base_learner* ect_setup(options_i& options, vw& all) @@ -359,6 +359,6 @@ base_learner* ect_setup(options_i& options, vw& all) data->class_boundary = 0.5; // as --link=logistic maps predictions in [0;1] learner& l = init_multiclass_learner(data, as_singleline(base), learn, predict, all.p, wpp); - + l.label_type = label_type_t::multi; return make_base(l); } diff --git a/vowpalwabbit/example.cc b/vowpalwabbit/example.cc index a66e8783ada..9db124e6675 100644 --- a/vowpalwabbit/example.cc +++ b/vowpalwabbit/example.cc @@ -35,17 +35,16 @@ float collision_cleanup(features& fs) namespace VW { -void copy_example_label(example* dst, example* src, size_t, void (*copy_label)(void*, void*)) + +VW_DEPRECATED("Copy the label object directly.") +void copy_example_label(example* dst, example* src, size_t, void (* /*copy_label*/)(polylabel&, polylabel&)) { - if (copy_label) - copy_label(&dst->l, &src->l); // TODO: we really need to delete_label on dst :( - else - dst->l = src->l; + dst->l = src->l; } void copy_example_metadata(bool /* audit */, example* dst, example* src) { - copy_array(dst->tag, src->tag); + dst->tag = src->tag; dst->example_counter = src->example_counter; dst->ft_offset = src->ft_offset; @@ -55,8 +54,7 @@ void copy_example_metadata(bool /* audit */, example* dst, example* src) dst->passthrough = nullptr; else { - dst->passthrough = new features; - dst->passthrough->deep_copy_from(*src->passthrough); + dst->passthrough = new features(*src->passthrough); } dst->loss = src->loss; dst->weight = src->weight; @@ -72,18 +70,25 @@ void copy_example_data(bool audit, example* dst, example* src) copy_example_metadata(audit, dst, src); // copy feature data - copy_array(dst->indices, src->indices); - for (namespace_index c : src->indices) dst->feature_space[c].deep_copy_from(src->feature_space[c]); + dst->indices = src->indices; + for (namespace_index c : src->indices) + { + // Performs deep copy of namespace + dst->feature_space[c] = src->feature_space[c]; + } // copy_array(dst->atomics[i], src->atomics[i]); dst->num_features = src->num_features; dst->total_sum_feat_sq = src->total_sum_feat_sq; + + // Shallow copy dst->interactions = src->interactions; } -void copy_example_data(bool audit, example* dst, example* src, size_t label_size, void (*copy_label)(void*, void*)) +void copy_example_data( + bool audit, example* dst, example* src, size_t /*label_size*/, void (* /*copy_label*/)(polylabel&, polylabel&)) { copy_example_data(audit, dst, src); - copy_example_label(dst, src, label_size, copy_label); + dst->l = src->l; } void move_feature_namespace(example* dst, example* src, namespace_index c) @@ -124,7 +129,6 @@ feature* get_features(vw& all, example* ec, size_t& feature_map_len) features_and_source fs; fs.stride_shift = all.weights.stride_shift(); fs.mask = (uint64_t)all.weights.mask() >> all.weights.stride_shift(); - fs.feature_map = v_init(); GD::foreach_feature(all, *ec, fs); feature_map_len = fs.feature_map.size(); @@ -150,7 +154,7 @@ flat_example* flatten_example(vw& all, example* ec) { flat_example& fec = calloc_or_throw(); fec.l = ec->l; - fec.l.simple.weight = ec->weight; + fec.l.simple().weight = ec->weight; fec.tag_len = ec->tag.size(); if (fec.tag_len > 0) @@ -184,50 +188,37 @@ flat_example* flatten_sort_example(vw& all, example* ec) return fec; } +VW_DEPRECATED("") void free_flatten_example(flat_example* fec) { - // note: The label memory should be freed by by freeing the original example. if (fec) { - fec->fs.~features(); - if (fec->tag_len > 0) - free(fec->tag); - free(fec); + fec->~flat_example(); } } namespace VW { -example* alloc_examples(size_t, size_t count = 1) +example* alloc_examples(size_t count = 1) { example* ec = calloc_or_throw(count); if (ec == nullptr) return nullptr; for (size_t i = 0; i < count; i++) { - ec[i].ft_offset = 0; - // std::cerr << " alloc_example.indices.begin()=" << ec->indices.begin() << " end=" << ec->indices.end() << " // - // ld = " << ec->ld << "\t|| me = " << ec << std::endl; + new (&ec[i]) example(); } return ec; } -void dealloc_example(void (*delete_label)(void*), example& ec, void (*delete_prediction)(void*)) +example* alloc_examples(size_t, size_t count) { - if (delete_label) - delete_label(&ec.l); - - if (delete_prediction) - delete_prediction(&ec.pred); - - ec.tag.delete_v(); - - if (ec.passthrough) - { - delete ec.passthrough; - } + return alloc_examples(count); +} - ec.indices.delete_v(); +VW_DEPRECATED("You can just use the example destructor when deallocating now") +void dealloc_example(void (* /*delete_label*/)(polylabel&), example& ec, void (* /*delete_prediction*/)(void*)) +{ ec.~example(); } diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h index e8573516c5c..08593f68acc 100644 --- a/vowpalwabbit/example.h +++ b/vowpalwabbit/example.h @@ -19,35 +19,9 @@ #include "conditional_contextual_bandit.h" #include "ccb_label.h" #include - -typedef union -{ - no_label::no_label empty; - label_data simple; - MULTICLASS::label_t multi; - COST_SENSITIVE::label cs; - CB::label cb; - CCB::label conditional_contextual_bandit; - CB_EVAL::label cb_eval; - MULTILABEL::labels multilabels; -} polylabel; - -inline void delete_scalars(void* v) -{ - v_array* preds = (v_array*)v; - preds->delete_v(); -} - -typedef union -{ - float scalar; - v_array scalars; // a sequence of scalar predictions - ACTION_SCORE::action_scores a_s; // a sequence of classes with scores. Also used for probabilities. - CCB::decision_scores_t decision_scores; - uint32_t multiclass; - MULTILABEL::labels multilabels; - float prob; // for --probabilities --csoaa_ldf=mc -} polyprediction; +#include "vw_exception.h" +#include "label.h" +#include "prediction.h" IGNORE_DEPRECATED_USAGE_START struct example : public example_predict // core example datatype. @@ -74,10 +48,17 @@ struct example : public example_predict // core example datatype. bool test_only; bool end_pass; // special example indicating end of pass. - bool sorted; // Are the features sorted or not? - + bool sorted; // Are the features sorted or not? VW_DEPRECATED("in_use has been removed, examples taken from the pool are assumed to be in use if there is a reference to them. Standalone examples are by definition always in use.") bool in_use = true; + + ~example() + { + if (passthrough) + { + delete passthrough; + } + } }; IGNORE_DEPRECATED_USAGE_END @@ -88,7 +69,7 @@ struct flat_example polylabel l; size_t tag_len; - char* tag; // An identifier for the example. + char* tag = nullptr; // An identifier for the example. size_t example_counter; uint64_t ft_offset; @@ -97,6 +78,81 @@ struct flat_example size_t num_features; // precomputed, cause it's fast&easy. float total_sum_feat_sq; // precomputed, cause it's kind of fast & easy. features fs; // all the features + + ~flat_example() + { + if (tag_len > 0) + free(tag); + } + + flat_example(const flat_example& other) + { + l = other.l; + tag_len = other.tag_len; + if (tag_len > 0) + { + memcpy(tag, other.tag, tag_len); + } + example_counter = other.example_counter; + ft_offset = other.ft_offset; + global_weight = other.global_weight; + num_features = other.num_features; + total_sum_feat_sq = other.total_sum_feat_sq; + fs = other.fs; + } + + flat_example& operator=(const flat_example& other) + { + l = other.l; + tag_len = other.tag_len; + if(tag != nullptr) + { + free(tag); + tag = nullptr; + } + if (tag_len > 0) + { + memcpy(tag, other.tag, tag_len); + } + example_counter = other.example_counter; + ft_offset = other.ft_offset; + global_weight = other.global_weight; + num_features = other.num_features; + total_sum_feat_sq = other.total_sum_feat_sq; + fs = other.fs; + return *this; + } + + flat_example(flat_example&& other) + { + l = std::move(other.l); + tag_len = other.tag_len; + tag = other.tag; + example_counter = other.example_counter; + ft_offset = other.ft_offset; + global_weight = other.global_weight; + num_features = other.num_features; + total_sum_feat_sq = other.total_sum_feat_sq; + fs = std::move(other.fs); + } + + flat_example& operator=(flat_example&& other) + { + l = std::move(other.l); + tag_len = other.tag_len; + if(tag != nullptr) + { + free(tag); + } + tag = other.tag; + example_counter = other.example_counter; + ft_offset = other.ft_offset; + global_weight = other.global_weight; + num_features = other.num_features; + total_sum_feat_sq = other.total_sum_feat_sq; + fs = std::move(other.fs); + return *this; + } }; flat_example* flatten_example(vw& all, example* ec); diff --git a/vowpalwabbit/example_predict.cc b/vowpalwabbit/example_predict.cc index 0031a46b4e0..69853cb05c0 100644 --- a/vowpalwabbit/example_predict.cc +++ b/vowpalwabbit/example_predict.cc @@ -6,14 +6,10 @@ safe_example_predict::safe_example_predict() { - indices = v_init(); - ft_offset = 0; - // feature_space is initialized through constructors } safe_example_predict::~safe_example_predict() { - indices.delete_v(); } void safe_example_predict::clear() diff --git a/vowpalwabbit/example_predict.h b/vowpalwabbit/example_predict.h index d167127bad1..2fbf5584174 100644 --- a/vowpalwabbit/example_predict.h +++ b/vowpalwabbit/example_predict.h @@ -37,7 +37,7 @@ struct example_predict v_array indices; std::array feature_space; // Groups of feature values. - uint64_t ft_offset; // An offset for all feature values. + uint64_t ft_offset = 0; // An offset for all feature values. // Interactions are specified by this vector of strings, where each string is an interaction and each char is a // namespace. @@ -48,7 +48,9 @@ struct example_predict }; // make sure we have an exception safe version of example_predict -class safe_example_predict : public example_predict +class +VW_DEPRECATED("example now uses C++ lifecycle functions. Please migrate to that instead for RAII needs.") +safe_example_predict : public example_predict { public: safe_example_predict(); diff --git a/vowpalwabbit/explore_eval.cc b/vowpalwabbit/explore_eval.cc index d03e3def73f..6bbf4a73d78 100644 --- a/vowpalwabbit/explore_eval.cc +++ b/vowpalwabbit/explore_eval.cc @@ -61,7 +61,7 @@ void output_example(vw& all, explore_eval& c, example& ec, multi_ex* ec_seq) size_t num_features = 0; float loss = 0.; - ACTION_SCORE::action_scores preds = (*ec_seq)[0]->pred.a_s; + const auto& preds = (*ec_seq)[0]->pred.action_probs(); for (size_t i = 0; i < (*ec_seq).size(); i++) if (!CB::ec_is_example_header(*(*ec_seq)[i])) @@ -84,13 +84,13 @@ void output_example(vw& all, explore_eval& c, example& ec, multi_ex* ec_seq) all.sd->update(holdout_example, labeled_example, loss, ec.weight, num_features); - for (int sink : all.final_prediction_sink) print_action_score(sink, ec.pred.a_s, ec.tag); + for (int sink : all.final_prediction_sink) print_action_score(sink, ec.pred.action_probs(), ec.tag); if (all.raw_prediction > 0) { std::string outputString; std::stringstream outputStringStream(outputString); - const auto& costs = ec.l.cb.costs; + const auto& costs = ec.l.cb().costs; for (size_t i = 0; i < costs.size(); i++) { @@ -131,18 +131,18 @@ void do_actual_learning(explore_eval& data, multi_learner& base, multi_ex& ec_se if (label_example != nullptr) // extract label { - data.action_label = label_example->l.cb; - label_example->l.cb = data.empty_label; + data.action_label = label_example->l.cb(); + label_example->l.cb() = data.empty_label; } multiline_learn_or_predict(base, ec_seq, data.offset); if (label_example != nullptr) // restore label - label_example->l.cb = data.action_label; + label_example->l.cb() = data.action_label; data.known_cost = CB_ADF::get_observed_cost(ec_seq); if (label_example != nullptr && is_learn) { - ACTION_SCORE::action_scores& a_s = ec_seq[0]->pred.a_s; + auto& a_s = ec_seq[0]->pred.action_probs(); float action_probability = 0; for (size_t i = 0; i < a_s.size(); i++) @@ -164,12 +164,12 @@ void do_actual_learning(explore_eval& data, multi_learner& base, multi_ex& ec_se example* ec_found = nullptr; for (example*& ec : ec_seq) { - if (ec->l.cb.costs.size() == 1 && ec->l.cb.costs[0].cost != FLT_MAX && ec->l.cb.costs[0].probability > 0) + if (ec->l.cb().costs.size() == 1 && ec->l.cb().costs[0].cost != FLT_MAX && ec->l.cb().costs[0].probability > 0) ec_found = ec; if (threshold > 1) ec->weight *= threshold; } - ec_found->l.cb.costs[0].probability = action_probability; + ec_found->l.cb().costs[0].probability = action_probability; multiline_learn_or_predict(base, ec_seq, data.offset); @@ -178,7 +178,7 @@ void do_actual_learning(explore_eval& data, multi_learner& base, multi_ex& ec_se float inv_threshold = 1.f / threshold; for (auto& ec : ec_seq) ec->weight *= inv_threshold; } - ec_found->l.cb.costs[0].probability = data.known_cost.probability; + ec_found->l.cb().costs[0].probability = data.known_cost.probability; data.update_count++; } } @@ -211,16 +211,14 @@ base_learner* explore_eval_setup(options_i& options, vw& all) if (!options.was_supplied("cb_explore_adf")) options.insert("cb_explore_adf", ""); - all.delete_prediction = nullptr; - multi_learner* base = as_multiline(setup_base(options, all)); all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; learner& l = init_learner(data, base, do_actual_learning, do_actual_learning, 1, prediction_type_t::action_probs); l.set_finish_example(finish_multiline_example); l.set_finish(finish); + l.label_type = label_type_t::cb; return make_base(l); } diff --git a/vowpalwabbit/expreplay.h b/vowpalwabbit/expreplay.h index 78ed788bf23..eb55d40bfb5 100644 --- a/vowpalwabbit/expreplay.h +++ b/vowpalwabbit/expreplay.h @@ -8,6 +8,7 @@ #include "parse_args.h" #include "rand48.h" #include +#include namespace ExpReplay { @@ -16,23 +17,13 @@ struct expreplay { vw* all; std::shared_ptr _random_state; - size_t N; // how big is the buffer? - example* buf; // the deep copies of examples (N of them) - bool* filled; // which of buf[] is filled + size_t N; // how big is the buffer? + std::vector buf; // the deep copies of examples (N of them) + + std::vector filled; // which of buf[] is filled size_t replay_count; // each time er.learn() is called, how many times do we call base.learn()? default=1 (in which // case we're just permuting) LEARNER::single_learner* base; - - ~expreplay() - { - for (size_t n = 0; n < N; n++) - { - lp.delete_label(&buf[n].l); - VW::dealloc_example(NULL, buf[n], NULL); // TODO: need to free label - } - free(buf); - free(filled); - } }; template @@ -40,7 +31,7 @@ void predict_or_learn(expreplay& er, LEARNER::single_learner& base, example& { // regardless of what happens, we must predict base.predict(ec); // if we're not learning, that's all that has to happen - if (!is_learn || lp.get_weight(&ec.l) == 0.) + if (!is_learn || lp.get_weight(ec.l) == 0.) return; for (size_t replay = 1; replay < er.replay_count; replay++) @@ -56,10 +47,11 @@ void predict_or_learn(expreplay& er, LEARNER::single_learner& base, example& er.filled[n] = true; VW::copy_example_data(er.all->audit, &er.buf[n], &ec); // don't copy the label - if (lp.copy_label) - lp.copy_label(&er.buf[n].l, &ec.l); - else - er.buf[n].l = ec.l; + + // By copying these, we don't need to know the type and it can be generic. + er.buf[n].l = ec.l; + // Technically we don't need to copy here, but this allows us to set the type of pred correctly. + er.buf[n].pred = ec.pred; } template @@ -81,6 +73,7 @@ void end_pass(expreplay& er) } } +// TODO Only lp dependency is on weight - which should be able to be removed once weight is an example concept. template LEARNER::base_learner* expreplay_setup(VW::config::options_i& options, vw& all) { @@ -106,23 +99,25 @@ LEARNER::base_learner* expreplay_setup(VW::config::options_i& options, vw& all) er->all = &all; er->_random_state = all.get_random_state(); - er->buf = VW::alloc_examples(1, er->N); - er->buf->interactions = &all.interactions; - - if (er_level == 'c') - for (size_t n = 0; n < er->N; n++) er->buf[n].l.cs.costs = v_init(); + er->buf.resize(er->N); + for (auto& ex : er->buf) + { + ex.interactions = &all.interactions; + } - er->filled = calloc_or_throw(er->N); + er->filled.resize(er->N, false); if (!all.quiet) std::cerr << "experience replay level=" << er_level << ", buffer=" << er->N << ", replay count=" << er->replay_count << std::endl; - er->base = LEARNER::as_singleline(setup_base(options, all)); + // er is a unique ptr and after calling init_learner it is reset. So that we can reference base after init_learner we need to store it here. + auto base = LEARNER::as_singleline(setup_base(options, all)); + er->base = base; LEARNER::learner, example>* l = &init_learner(er, er->base, predict_or_learn, predict_or_learn); l->set_end_pass(end_pass); - + l->label_type = base->label_type; return make_base(*l); } } // namespace ExpReplay diff --git a/vowpalwabbit/ezexample.h b/vowpalwabbit/ezexample.h index d11966e666e..079f8a8965b 100644 --- a/vowpalwabbit/ezexample.h +++ b/vowpalwabbit/ezexample.h @@ -42,11 +42,12 @@ class ezexample example* get_new_example() { - example* new_ec = VW::new_unused_example(*vw_par_ref); - vw_par_ref->p->lp.default_label(&new_ec->l); + auto new_ec = VW::new_unused_example(*vw_par_ref); + vw_par_ref->p->lp.default_label(new_ec->l); new_ec->tag.clear(); new_ec->indices.clear(); - for (auto& i : new_ec->feature_space) i.clear(); + for (auto& i : new_ec->feature_space) + i.clear(); new_ec->ft_offset = 0; new_ec->num_features = 0; @@ -73,7 +74,8 @@ class ezexample quadratic_features_num = 0; quadratic_features_sqr = 0.; - for (bool& ns_exist : ns_exists) ns_exist = false; + for (bool& ns_exist : ns_exists) + ns_exist = false; example_changed_since_prediction = true; } @@ -97,7 +99,7 @@ class ezexample ezexample(vw* this_vw, bool multiline = false, vw* this_vw_parser = nullptr) { setup_new_ezexample(this_vw, multiline, this_vw_parser); - example_copies = v_init(); + example_copies.clear(); ec = get_new_example(); we_create_ec = true; @@ -115,7 +117,8 @@ class ezexample ec = this_ec; we_create_ec = false; - for (auto ns : ec->indices) ns_exists[ns] = true; + for (auto ns : ec->indices) + ns_exists[ns] = true; if (current_ns != 0) { str[0] = current_ns; @@ -131,7 +134,6 @@ class ezexample if (VW::is_ring_example(*vw_par_ref, ec)) VW::finish_example(*vw_par_ref, *ecc); example_copies.clear(); - free(example_copies.begin()); } bool ensure_ns_exists(char c) // returns TRUE iff we should ignore it :) @@ -230,7 +232,7 @@ class ezexample void mini_setup_example() { ec->partial_prediction = 0.; - ec->weight = vw_par_ref->p->lp.get_weight(&ec->l); + ec->weight = vw_par_ref->p->lp.get_weight(ec->l); ec->num_features -= quadratic_features_num; ec->total_sum_feat_sq -= quadratic_features_sqr; @@ -260,7 +262,7 @@ class ezexample float predict() { setup_for_predict(); - return ec->pred.scalar; + return ec->pred.scalar(); } float predict_partial() @@ -284,7 +286,7 @@ class ezexample else // is multiline { // we need to make a copy example* copy = get_new_example(); - VW::copy_example_data(vw_ref->audit, copy, ec, vw_par_ref->p->lp.label_size, vw_par_ref->p->lp.copy_label); + *copy = *ec; vw_ref->learn(*copy); example_copies.push_back(copy); } diff --git a/vowpalwabbit/feature_group.h b/vowpalwabbit/feature_group.h index f6ed984020e..4fcebc69f2d 100644 --- a/vowpalwabbit/feature_group.h +++ b/vowpalwabbit/feature_group.h @@ -273,68 +273,12 @@ struct features iterator_all end() { return iterator_all(_outer->values.end(), _outer->indicies.end(), _outer->space_names.end()); } }; - features() - { - values = v_init(); - indicies = v_init(); - space_names = v_init(); - sum_feat_sq = 0.f; - } + features() { sum_feat_sq = 0.f; } - ~features() { - values.delete_v(); - indicies.delete_v(); - space_names.delete_v(); - } - features(const features&) = delete; - features & operator=( const features& ) = delete; - - - // custom move operators required since we need to leave the old value in - // a null state to prevent freeing of shallow copied v_arrays - features(features&& other) : - values(std::move(other.values)), - indicies(std::move(other.indicies)), - space_names(std::move(other.space_names)), - sum_feat_sq(other.sum_feat_sq) - { - // We need to null out all the v_arrays to prevent double freeing during moves - auto & v = other.values; - v._begin = nullptr; - v._end = nullptr; - v.end_array = nullptr; - auto & i = other.indicies; - i._begin = nullptr; - i._end = nullptr; - i.end_array = nullptr; - auto & s = other.space_names; - s._begin = nullptr; - s._end = nullptr; - s.end_array = nullptr; - other.sum_feat_sq = 0; - } - features & operator=(features&& other) - { - values = std::move(other.values); - indicies = std::move(other.indicies); - space_names = std::move(other.space_names); - sum_feat_sq = other.sum_feat_sq; - // We need to null out all the v_arrays to prevent double freeing during moves - auto & v = other.values; - v._begin = nullptr; - v._end = nullptr; - v.end_array = nullptr; - auto & i = other.indicies; - i._begin = nullptr; - i._end = nullptr; - i.end_array = nullptr; - auto & s = other.space_names; - s._begin = nullptr; - s._end = nullptr; - s.end_array = nullptr; - other.sum_feat_sq = 0; - return *this; - } + features(const features&) = default; + features& operator=(const features&) = default; + features(features&& other) = default; + features& operator=(features&& other) = default; inline size_t size() const { return values.size(); } @@ -441,6 +385,7 @@ struct features return true; } + VW_DEPRECATED("Use copy constructor") void deep_copy_from(const features& src) { copy_array(values, src.values); diff --git a/vowpalwabbit/ftrl.cc b/vowpalwabbit/ftrl.cc index 79fc3958e07..1a393547a7c 100644 --- a/vowpalwabbit/ftrl.cc +++ b/vowpalwabbit/ftrl.cc @@ -77,7 +77,7 @@ template void predict(ftrl& b, single_learner&, example& ec) { ec.partial_prediction = GD::inline_predict(*b.all, ec); - ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(b.all->sd, ec.partial_prediction); if (audit) GD::print_audit_features(*(b.all), ec); } @@ -87,7 +87,7 @@ void multipredict( ftrl& b, base_learner&, example& ec, size_t count, size_t step, polyprediction* pred, bool finalize_predictions) { vw& all = *b.all; - for (size_t c = 0; c < count; c++) pred[c].scalar = ec.l.simple.initial; + for (size_t c = 0; c < count; c++) pred[c].scalar() = ec.l.simple().initial; if (b.all->weights.sparse) { GD::multipredict_info mp = { @@ -100,14 +100,14 @@ void multipredict( GD::foreach_feature, uint64_t, GD::vec_add_multipredict>(all, ec, mp); } if (all.sd->contraction != 1.) - for (size_t c = 0; c < count; c++) pred[c].scalar *= (float)all.sd->contraction; + for (size_t c = 0; c < count; c++) pred[c].scalar() *= (float)all.sd->contraction; if (finalize_predictions) - for (size_t c = 0; c < count; c++) pred[c].scalar = GD::finalize_prediction(all.sd, pred[c].scalar); + for (size_t c = 0; c < count; c++) pred[c].scalar() = GD::finalize_prediction(all.sd, pred[c].scalar()); if (audit) { for (size_t c = 0; c < count; c++) { - ec.pred.scalar = pred[c].scalar; + ec.pred.scalar() = pred[c].scalar(); GD::print_audit_features(all, ec); ec.ft_offset += (uint64_t)step; } @@ -229,7 +229,7 @@ void update_state_and_predict_cb(ftrl& b, single_learner&, example& ec) ec.partial_prediction = b.data.predict / ((float)((b.all->normalized_sum_norm_x + 1e-6) / b.total_weight)); - ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(b.all->sd, ec.partial_prediction); } void update_state_and_predict_pistol(ftrl& b, single_learner&, example& ec) @@ -238,26 +238,26 @@ void update_state_and_predict_pistol(ftrl& b, single_learner&, example& ec) GD::foreach_feature(*b.all, ec, b.data); ec.partial_prediction = b.data.predict; - ec.pred.scalar = GD::finalize_prediction(b.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(b.all->sd, ec.partial_prediction); } void update_after_prediction_proximal(ftrl& b, example& ec) { - b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar(), ec.l.simple().label) * ec.weight; GD::foreach_feature(*b.all, ec, b.data); } void update_after_prediction_pistol(ftrl& b, example& ec) { - b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar(), ec.l.simple().label) * ec.weight; GD::foreach_feature(*b.all, ec, b.data); } void update_after_prediction_cb(ftrl& b, example& ec) { - b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar(), ec.l.simple().label) * ec.weight; GD::foreach_feature(*b.all, ec, b.data); } @@ -425,5 +425,6 @@ base_learner* ftrl_setup(options_i& options, vw& all) l->set_multipredict(multipredict); l->set_save_load(save_load); l->set_end_pass(end_pass); + l->label_type = label_type_t::simple; return make_base(*l); } diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 3a79e2c1f94..a028ccfb71c 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -323,7 +323,7 @@ void print_features(vw& all, example& ec) void print_audit_features(vw& all, example& ec) { if (all.audit) - print_result_by_ref(all.stdout_fileno, ec.pred.scalar, -1, ec.tag); + print_result_by_ref(all.stdout_fileno, ec.pred.scalar(), -1, ec.tag); fflush(stdout); print_features(all, ec); } @@ -356,7 +356,7 @@ inline void vec_add_trunc(trunc_data& p, const float fx, float& fw) inline float trunc_predict(vw& all, example& ec, double gravity) { - trunc_data temp = {ec.l.simple.initial, (float)gravity}; + trunc_data temp = {ec.l.simple().initial, (float)gravity}; foreach_feature(all, ec, temp); return temp.prediction; } @@ -377,7 +377,13 @@ void predict(gd& g, base_learner&, example& ec) ec.partial_prediction = inline_predict(all, ec); ec.partial_prediction *= (float)all.sd->contraction; - ec.pred.scalar = finalize_prediction(all.sd, ec.partial_prediction); + + if (ec.pred.get_type() != prediction_type_t::unset) + { + ec.pred.reset(); + } + + ec.pred.init_as_scalar() = finalize_prediction(all.sd, ec.partial_prediction); if (audit) print_audit_features(all, ec); } @@ -387,7 +393,7 @@ inline void vec_add_trunc_multipredict(multipredict_info& mp, const float fx, { size_t index = fi; for (size_t c = 0; c < mp.count; c++, index += mp.step) - mp.pred[c].scalar += fx * trunc_weight(mp.weights[index], mp.gravity); + mp.pred[c].scalar() += fx * trunc_weight(mp.weights[index], mp.gravity); } template @@ -395,7 +401,7 @@ void multipredict( gd& g, base_learner&, example& ec, size_t count, size_t step, polyprediction* pred, bool finalize_predictions) { vw& all = *g.all; - for (size_t c = 0; c < count; c++) pred[c].scalar = ec.l.simple.initial; + for (size_t c = 0; c < count; c++) pred[c].scalar() = ec.l.simple().initial; if (g.all->weights.sparse) { multipredict_info mp = { @@ -414,14 +420,14 @@ void multipredict( foreach_feature, uint64_t, vec_add_multipredict>(all, ec, mp); } if (all.sd->contraction != 1.) - for (size_t c = 0; c < count; c++) pred[c].scalar *= (float)all.sd->contraction; + for (size_t c = 0; c < count; c++) pred[c].scalar() *= (float)all.sd->contraction; if (finalize_predictions) - for (size_t c = 0; c < count; c++) pred[c].scalar = finalize_prediction(all.sd, pred[c].scalar); + for (size_t c = 0; c < count; c++) pred[c].scalar() = finalize_prediction(all.sd, pred[c].scalar()); if (audit) { for (size_t c = 0; c < count; c++) { - ec.pred.scalar = pred[c].scalar; + ec.pred.scalar() = pred[c].scalar(); print_audit_features(all, ec); ec.ft_offset += (uint64_t)step; } @@ -533,12 +539,12 @@ template getSquareGrad(ec.pred.scalar, ld.label); + grad_squared *= all.loss->getSquareGrad(ec.pred.scalar(), ld.label); if (grad_squared == 0 && !stateless) return 1.; @@ -601,25 +607,25 @@ template 0 - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); vw& all = *g.all; float update = 0.; - ec.updated_prediction = ec.pred.scalar; - if (all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) > 0.) + ec.updated_prediction = ec.pred.scalar(); + if (all.loss->getLoss(all.sd, ec.pred.scalar(), ld.label) > 0.) { float pred_per_update = sensitivity(g, ec); float update_scale = get_scale(g, ec, ec.weight); if (invariant) - update = all.loss->getUpdate(ec.pred.scalar, ld.label, update_scale, pred_per_update); + update = all.loss->getUpdate(ec.pred.scalar(), ld.label, update_scale, pred_per_update); else - update = all.loss->getUnsafeUpdate(ec.pred.scalar, ld.label, update_scale); + update = all.loss->getUnsafeUpdate(ec.pred.scalar(), ld.label, update_scale); // changed from ec.partial_prediction to ld.prediction ec.updated_prediction += pred_per_update * update; if (all.reg_mode && fabs(update) > 1e-8) { - double dev1 = all.loss->first_derivative(all.sd, ec.pred.scalar, ld.label); + double dev1 = all.loss->first_derivative(all.sd, ec.pred.scalar(), ld.label); double eta_bar = (fabs(dev1) > 1e-8) ? (-update / dev1) : 0.0; if (fabs(dev1) > 1e-8) all.sd->contraction *= (1. - all.l2_lambda * eta_bar); @@ -629,7 +635,7 @@ float compute_update(gd& g, example& ec) } if (sparse_l2) - update -= g.sparse_l2 * ec.pred.scalar; + update -= g.sparse_l2 * ec.pred.scalar(); return update; } @@ -653,7 +659,7 @@ template 0 - assert(ec.l.simple.label != FLT_MAX); + assert(ec.l.simple().label != FLT_MAX); assert(ec.weight > 0.); g.predict(g, base, ec); update(g, base, ec); @@ -1242,6 +1248,7 @@ base_learner* setup(options_i& options, vw& all) ret.set_update(bare->update); ret.set_save_load(save_load); ret.set_end_pass(end_pass); + ret.label_type = label_type_t::simple; return make_base(ret); } diff --git a/vowpalwabbit/gd.h b/vowpalwabbit/gd.h index f0ae9896c64..82ef317d3c4 100644 --- a/vowpalwabbit/gd.h +++ b/vowpalwabbit/gd.h @@ -46,14 +46,14 @@ inline void vec_add_multipredict(multipredict_info& mp, const float fx, uint6 { i += fi; for (; i <= top; i += mp.step, ++p) - p->scalar += + p->scalar() += fx * mp.weights[i]; // TODO: figure out how to use weight_parameters::iterator (not using change_begin()) } else // TODO: this could be faster by unrolling into two loops for (size_t c = 0; c < mp.count; ++c, fi += (uint64_t)mp.step, ++p) { fi &= mask; - p->scalar += fx * mp.weights[fi]; + p->scalar() += fx * mp.weights[fi]; } } @@ -94,9 +94,9 @@ inline void foreach_feature(vw& all, example& ec, R& dat) inline float inline_predict(vw& all, example& ec) { return all.weights.sparse ? inline_predict(all.weights.sparse_weights, all.ignore_some_linear, - all.ignore_linear, *ec.interactions, all.permutations, ec, ec.l.simple.initial) + all.ignore_linear, *ec.interactions, all.permutations, ec, ec.l.simple().initial) : inline_predict(all.weights.dense_weights, all.ignore_some_linear, - all.ignore_linear, *ec.interactions, all.permutations, ec, ec.l.simple.initial); + all.ignore_linear, *ec.interactions, all.permutations, ec, ec.l.simple().initial); } inline float sign(float w) diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc index 662554c7995..ea3be16770f 100644 --- a/vowpalwabbit/gd_mf.cc +++ b/vowpalwabbit/gd_mf.cc @@ -27,7 +27,6 @@ struct gdmf uint32_t rank; size_t no_win_counter; uint64_t early_stop_thres; - ~gdmf() { scalars.delete_v(); } }; void mf_print_offset_features(gdmf& d, example& ec, size_t offset) @@ -77,7 +76,7 @@ void mf_print_offset_features(gdmf& d, example& ec, size_t offset) void mf_print_audit_features(gdmf& d, example& ec, size_t offset) { - print_result_by_ref(d.all->stdout_fileno, ec.pred.scalar, -1, ec.tag); + print_result_by_ref(d.all->stdout_fileno, ec.pred.scalar(), -1, ec.tag); mf_print_offset_features(d, ec, offset); } @@ -93,7 +92,7 @@ template float mf_predict(gdmf& d, example& ec, T& weights) { vw& all = *d.all; - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); float prediction = ld.initial; for (std::string& i : d.all->pairs) @@ -153,15 +152,15 @@ float mf_predict(gdmf& d, example& ec, T& weights) all.set_minmax(all.sd, ld.label); - ec.pred.scalar = GD::finalize_prediction(all.sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(all.sd, ec.partial_prediction); if (ld.label != FLT_MAX) - ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ec.weight; + ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar(), ld.label) * ec.weight; if (all.audit) mf_print_audit_features(d, ec, 0); - return ec.pred.scalar; + return ec.pred.scalar(); } float mf_predict(gdmf& d, example& ec) @@ -184,12 +183,12 @@ template void mf_train(gdmf& d, example& ec, T& weights) { vw& all = *d.all; - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); // use final prediction to get update size // update = eta_t*(y-y_hat) where eta_t = eta/(3*t^p) * importance weight float eta_t = all.eta / powf((float)all.sd->t + ec.weight, (float)all.power_t) / 3.f * ec.weight; - float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); // ec.total_sum_feat_sq); + float update = all.loss->getUpdate(ec.pred.scalar(), ld.label, eta_t, 1.); // ec.total_sum_feat_sq); float regularization = eta_t * all.l2_lambda; @@ -317,7 +316,7 @@ void learn(gdmf& d, single_learner&, example& ec) vw& all = *d.all; mf_predict(d, ec); - if (all.training && ec.l.simple.label != FLT_MAX) + if (all.training && ec.l.simple().label != FLT_MAX) mf_train(d, ec); } @@ -377,6 +376,6 @@ base_learner* gd_mf_setup(options_i& options, vw& all) learner& l = init_learner(data, learn, predict, (UINT64_ONE << all.weights.stride_shift())); l.set_save_load(save_load); l.set_end_pass(end_pass); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index 6df5c39360d..2f57a7bc272 100644 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -46,7 +46,7 @@ void gen_cs_example_ips(multi_ex& examples, COST_SENSITIVE::label& cs_labels, fl cs_labels.costs.clear(); for (uint32_t i = 0; i < examples.size(); i++) { - CB::label ld = examples[i]->l.cb; + CB::label& ld = examples[i]->l.cb(); COST_SENSITIVE::wclass wc = {0., i, 0., 0.}; if (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX) @@ -61,7 +61,7 @@ void gen_cs_example_dm(multi_ex& examples, COST_SENSITIVE::label& cs_labels) cs_labels.costs.clear(); for (uint32_t i = 0; i < examples.size(); i++) { - CB::label ld = examples[i]->l.cb; + CB::label& ld = examples[i]->l.cb(); COST_SENSITIVE::wclass wc = {0., i, 0., 0.}; if (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX) @@ -145,7 +145,7 @@ void gen_cs_example_mtr(cb_to_cs_adf& c, multi_ex& ec_seq, COST_SENSITIVE::label cs_labels.costs.clear(); for (size_t i = 0; i < ec_seq.size(); i++) { - CB::label ld = ec_seq[i]->l.cb; + CB::label& ld = ec_seq[i]->l.cb(); COST_SENSITIVE::wclass wc = {0, 0, 0, 0}; diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h index a4e1656009c..53f88fb4945 100644 --- a/vowpalwabbit/gen_cs_example.h +++ b/vowpalwabbit/gen_cs_example.h @@ -8,6 +8,7 @@ #include "reductions.h" #include "cb_algs.h" #include "vw_exception.h" +#include "util.h" namespace GEN_CS { @@ -50,7 +51,7 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld template void gen_cs_example_dm(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld) { // this implements the direct estimation method, where costs are directly specified by the learned regressor. - CB::label ld = ec.l.cb; + CB::label& ld = ec.l.cb(); float min = FLT_MAX; uint32_t argmin = 1; @@ -115,7 +116,7 @@ void gen_cs_example_dm(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld) } } - ec.pred.multiclass = argmin; + ec.pred.multiclass() = argmin; } template @@ -262,26 +263,31 @@ void call_cs_ldf(LEARNER::multi_learner& base, multi_ex& examples, v_arrayl.cb); + cb_labels.push_back(std::move(ec->l.cb())); prepped_cs_labels[index].costs.clear(); prepped_cs_labels[index].costs.push_back(cs_labels.costs[index]); - ec->l.cs = prepped_cs_labels[index++]; + ec->l.reset(); + ec->l.init_as_cs(std::move(prepped_cs_labels[index++])); ec->ft_offset = offset; } + swap_to_scores(examples); // 2nd: predict for each ex // // call base.predict for all examples if (is_learn) base.learn(examples, (int32_t)id); else base.predict(examples, (int32_t)id); + swap_to_probs(examples); // 3rd: restore cb_label for each example - // (**ec).l.cb = array.element. + // (**ec).l.cb() = array.element. // and restore offsets for (size_t i = 0; i < examples.size(); ++i) { - examples[i]->l.cb = cb_labels[i]; + prepped_cs_labels[i].costs = std::move(examples[i]->l.cs().costs); + examples[i]->l.reset(); + examples[i]->l.init_as_cb(std::move(cb_labels[i])); examples[i]->ft_offset = saved_offset; } } diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc index dce672af3b2..4cdbb82b832 100644 --- a/vowpalwabbit/global_data.cc +++ b/vowpalwabbit/global_data.cc @@ -312,6 +312,11 @@ vw_ostream::vw_ostream() : std::ostream(&buf), buf(*this), trace_context(nullptr trace_listener = trace_listener_cerr; } +void delete_polyprediction(polyprediction& pred) +{ + pred.reset(); +} + IGNORE_DEPRECATED_USAGE_START vw::vw() { @@ -323,8 +328,6 @@ vw::vw() sd->max_label = 0; sd->min_label = 0; - label_type = label_type_t::simple; - l = nullptr; scorer = nullptr; cost_sensitive = nullptr; @@ -335,7 +338,7 @@ vw::vw() current_pass = 0; data_filename = ""; - delete_prediction = nullptr; + delete_prediction = &delete_polyprediction; bfgs = false; no_bias = false; diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h index d08ebb77894..897d1ba8297 100644 --- a/vowpalwabbit/global_data.h +++ b/vowpalwabbit/global_data.h @@ -319,18 +319,6 @@ enum AllReduceType class AllReduce; - -enum class label_type_t -{ - simple, - cb, // contextual-bandit - cb_eval, // contextual-bandit evaluation - cs, // cost-sensitive - multi, - mc, - ccb // conditional contextual-bandit -}; - struct rand_state { private: @@ -372,6 +360,11 @@ struct vw void (*set_minmax)(shared_data* sd, float label); + label_type_t get_label_type() const + { + return l->label_type; + } + uint64_t current_pass; uint32_t num_bits; // log_2 of the number of features. @@ -462,8 +455,9 @@ struct vw // This array is required to be value initialized so that the std::vectors are constructed. std::array>, NUM_NAMESPACES> namespace_dictionaries{}; // each namespace has a list of dictionaries attached to it - - void (*delete_prediction)(void*); + + VW_DEPRECATED("Use the polyprediciton destructor") + void (*delete_prediction)(polyprediction&); bool audit; // should I print lots of debugging information? bool quiet; // Should I suppress progress-printing of updates? bool training; // Should I train if lable data is available? @@ -537,13 +531,13 @@ struct vw vw(); std::shared_ptr get_random_state() { return _random_state_sp; } - vw(const vw&) = delete; - vw& operator=(const vw&) = delete; - - // vw object cannot be moved as many objects hold a pointer to it. - // That pointer would be invalidated if it were to be moved. - vw(const vw&&) = delete; - vw& operator=(const vw&&) = delete; + vw(const vw&) = delete; + vw& operator=(const vw&) = delete; + + // vw object cannot be moved as many objects hold a pointer to it. + // That pointer would be invalidated if it were to be moved. + vw(const vw&&) = delete; + vw& operator=(const vw&&) = delete; }; VW_DEPRECATED("Use print_result_by_ref instead") diff --git a/vowpalwabbit/interact.cc b/vowpalwabbit/interact.cc index 3d9786cf6e9..7438caa1c0d 100644 --- a/vowpalwabbit/interact.cc +++ b/vowpalwabbit/interact.cc @@ -112,7 +112,8 @@ void predict_or_learn(interact& in, LEARNER::single_learner& base, example& ec) ec.num_features -= f1.size(); ec.num_features -= f2.size(); - in.feat_store.deep_copy_from(f1); + // Deep copy of features + in.feat_store = f1; multiply(f1, f2, in); ec.total_sum_feat_sq += f1.sum_feat_sq; @@ -144,7 +145,9 @@ void predict_or_learn(interact& in, LEARNER::single_learner& base, example& ec) memmove(&ec.indices[n2_i + 1], &ec.indices[n2_i], sizeof(unsigned char) * (ec.indices.size() - n2_i - 1)); ec.indices[n2_i] = in.n2; - f1.deep_copy_from(in.feat_store); + // Deep copy of features + f1 = in.feat_store; + ec.total_sum_feat_sq = in.total_sum_feat_sq; ec.num_features = in.num_features; } @@ -174,9 +177,9 @@ LEARNER::base_learner* interact_setup(options_i& options, vw& all) std::cerr << "Interacting namespaces " << data->n1 << " and " << data->n2 << std::endl; data->all = &all; - LEARNER::learner* l; - l = &LEARNER::init_learner( - data, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn, 1); - - return make_base(*l); + auto base = as_singleline(setup_base(options, all)); + auto& l = LEARNER::init_learner( + data, base, predict_or_learn, predict_or_learn, 1); + l.label_type = base->label_type; + return make_base(l); } diff --git a/vowpalwabbit/interactions.cc b/vowpalwabbit/interactions.cc index 80ff61f672e..a786bab9f2d 100644 --- a/vowpalwabbit/interactions.cc +++ b/vowpalwabbit/interactions.cc @@ -234,7 +234,7 @@ void eval_count_of_generated_ft(vw& all, example& ec, size_t& new_features_cnt, new_features_cnt = 0; new_features_value = 0.; - v_array results = v_init(); + v_array results; if (all.permutations) { @@ -388,8 +388,6 @@ void eval_count_of_generated_ft(vw& all, example& ec, size_t& new_features_cnt, << correct_features_value << std::endl; #endif } - - results.delete_v(); } } // namespace INTERACTIONS diff --git a/vowpalwabbit/interactions_predict.h b/vowpalwabbit/interactions_predict.h index 1638761eefa..71943c7f2c4 100644 --- a/vowpalwabbit/interactions_predict.h +++ b/vowpalwabbit/interactions_predict.h @@ -106,7 +106,7 @@ inline void generate_interactions(std::vector& interactions, bool p // const uint64_t stride_shift = all.stride_shift; // it seems we don't need stride shift in FTRL-like hash // statedata for generic non-recursive iteration - v_array state_data = v_init(); + v_array state_data; feature_gen_data empty_ns_data; // micro-optimization. don't want to call its constructor each time in loop. empty_ns_data.loop_idx = 0; @@ -371,7 +371,5 @@ inline void generate_interactions(std::vector& interactions, bool p } // while do_it } } // foreach interaction in all.interactions - - state_data.delete_v(); } } // namespace INTERACTIONS diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h index 745fb6701a7..2ed57064bb4 100644 --- a/vowpalwabbit/io_buf.h +++ b/vowpalwabbit/io_buf.h @@ -69,6 +69,27 @@ class io_buf static constexpr int READ = 1; static constexpr int WRITE = 2; + io_buf(io_buf& other) = delete; + io_buf& operator=(io_buf& other) = delete; + io_buf(io_buf&& other) = delete; + io_buf& operator=(io_buf&& other) = delete; + + virtual ~io_buf() + { +#ifdef _WIN32 + int f = _fileno(stdin); +#else + int f = fileno(stdin); +#endif + + while (!files.empty() && files.last() == f) + files.pop(); + + // Calling a virtual function in a constructor or destructor will actually result + // in calling this classes implementation. Make it explicit so it is less confusing. + while (io_buf::close_file()); + } + void verify_hash(bool verify) { _verify_hash = verify; @@ -144,20 +165,10 @@ class io_buf io_buf() : _verify_hash{false}, _hash{0}, count{0}, current{0} { - space = v_init(); - files = v_init(); - currentname = v_init(); - finalname = v_init(); space.resize(INITIAL_BUFF_SIZE); head = space.begin(); } - virtual ~io_buf() - { - files.delete_v(); - space.delete_v(); - } - void set(char* p) { head = p; } virtual size_t num_files() { return files.size(); } @@ -213,12 +224,6 @@ class io_buf static void close_file_or_socket(int f); - void close_files() - { - while (close_file()) - ; - } - static bool is_socket(int f); void buf_write(char*& pointer, size_t n); diff --git a/vowpalwabbit/kernel_svm.cc b/vowpalwabbit/kernel_svm.cc index 5a369a4273f..c3f46d238ac 100644 --- a/vowpalwabbit/kernel_svm.cc +++ b/vowpalwabbit/kernel_svm.cc @@ -41,7 +41,6 @@ struct svm_example v_array krow; flat_example ex; - ~svm_example(); void init_svm_example(flat_example* fec); int compute_kernels(svm_params& params); int clear_kernels(); @@ -73,9 +72,7 @@ void free_svm_model(svm_model* model) model->support_vec[i] = 0; } - model->support_vec.delete_v(); - model->alpha.delete_v(); - model->delta.delete_v(); + model->~svm_model(); free(model); } @@ -144,17 +141,6 @@ void svm_example::init_svm_example(flat_example* fec) free(fec); } -svm_example::~svm_example() -{ - krow.delete_v(); - // free flatten example contents - //flat_example* fec = &calloc_or_throw(); - //*fec = ex; - //free_flatten_example(fec); // free contents of flat example and frees fec. - if (ex.tag_len > 0) - free(ex.tag); -} - float kernel_function(const flat_example* fec1, const flat_example* fec2, void* params, size_t kernel_type); int svm_example::compute_kernels(svm_params& params) @@ -273,7 +259,6 @@ int save_load_flat_example(io_buf& model_file, bool read, flat_example*& fec) { features& fs = fec->fs; size_t len = fs.size(); - fs.values = v_init(); fs.values.resize(len); brw = model_file.bin_read_fixed((char*)fs.values.begin(), len * sizeof(feature_value), ""); if (!brw) @@ -281,7 +266,7 @@ int save_load_flat_example(io_buf& model_file, bool read, flat_example*& fec) fs.values.end() = fs.values.begin() + len; len = fs.indicies.size(); - fs.indicies = v_init(); + fs.indicies.clear(); fs.indicies.resize(len); brw = model_file.bin_read_fixed((char*)fs.indicies.begin(), len * sizeof(feature_index), ""); if (!brw) @@ -471,7 +456,7 @@ void predict(svm_params& params, single_learner&, example& ec) sec->init_svm_example(fec); float score; predict(params, &sec, &score, 1); - ec.pred.scalar = score; + ec.pred.scalar() = score; sec->~svm_example(); free(sec); } @@ -484,9 +469,9 @@ size_t suboptimality(svm_model* model, double* subopt) double max_val = 0; for (size_t i = 0; i < model->num_support; i++) { - float tmp = model->alpha[i] * model->support_vec[i]->ex.l.simple.label; + float tmp = model->alpha[i] * model->support_vec[i]->ex.l.simple().label; - if ((tmp < model->support_vec[i]->ex.l.simple.weight && model->delta[i] < 0) || (tmp > 0 && model->delta[i] > 0)) + if ((tmp < model->support_vec[i]->ex.l.simple().weight && model->delta[i] < 0) || (tmp > 0 && model->delta[i] > 0)) subopt[i] = fabs(model->delta[i]); else subopt[i] = 0; @@ -555,7 +540,7 @@ bool update(svm_params& params, size_t pos) bool overshoot = false; // params.all->opts_n_args.trace_message<<"Updating model "<num_support<<" "; svm_example* fec = model->support_vec[pos]; - label_data& ld = fec->ex.l.simple; + label_data& ld = fec->ex.l.simple(); fec->compute_kernels(params); float* inprods = fec->krow.begin(); float alphaKi = dense_dot(inprods, model->alpha, model->num_support); @@ -569,8 +554,8 @@ bool update(svm_params& params, size_t pos) // std::cout<num_support<<" "<delta[pos]<<" " << ai<<" "< fec->ex.l.simple.weight) - ai = fec->ex.l.simple.weight; + if (ai > fec->ex.l.simple().weight) + ai = fec->ex.l.simple().weight; else if (ai < 0) ai = 0; @@ -589,7 +574,7 @@ bool update(svm_params& params, size_t pos) for (size_t i = 0; i < model->num_support; i++) { - label_data& ldi = model->support_vec[i]->ex.l.simple; + label_data& ldi = model->support_vec[i]->ex.l.simple(); model->delta[i] += diff * inprods[i] * ldi.label / params.lambda; } @@ -646,7 +631,7 @@ void sync_queries(vw& all, svm_params& params, bool* train_pool) { queries = calloc_or_throw(total_sum); memcpy(queries + prev_sum, b->space.begin(), b->head - b->space.begin()); - b->space.delete_v(); + b->space.clear(); all_reduce(all, queries, total_sum); b->space.begin() = queries; @@ -667,7 +652,6 @@ void sync_queries(vw& all, svm_params& params, bool* train_pool) // for(int j = 0;j < fec->feature_map_len;j++) // params.all->opts_n_args.trace_message<feature_map[j].weight_index<<":"<feature_map[j].x<<" "; // params.all->opts_n_args.trace_message<< endl; - // params.pool[i]->in_use = true; // params.current_t += ((label_data*) params.pool[i]->ld)->weight; // params.pool[i]->example_t = params.current_t; } @@ -731,7 +715,7 @@ void train(svm_params& params) if (params._random_state->get_and_update_random() < queryp) { svm_example* fec = params.pool[i]; - fec->ex.l.simple.weight *= 1 / queryp; + fec->ex.l.simple().weight *= 1 / queryp; train_pool[i] = 1; } } @@ -833,9 +817,9 @@ void learn(svm_params& params, single_learner&, example& ec) sec->init_svm_example(fec); float score = 0; predict(params, &sec, &score, 1); - ec.pred.scalar = score; + ec.pred.scalar() = score; // std::cout<<"Score = "<training && ec.example_counter % 100 == 0) trim_cache(params); @@ -942,5 +926,6 @@ LEARNER::base_learner* kernel_svm_setup(options_i& options, vw& all) learner& l = init_learner(params, learn, predict, 1); l.set_save_load(save_load); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/label.h b/vowpalwabbit/label.h new file mode 100644 index 00000000000..8e518f4f0b2 --- /dev/null +++ b/vowpalwabbit/label.h @@ -0,0 +1,428 @@ +#pragma once + +/* +When a new label type needs to be added the following actions must be taken: +- LABEL_TYPE is the type that will be used +- LABEL_NAME is the name to identify this label type +Steps: + 1. Add a new variant to label_type_t called LABEL_NAME + 2. Add the corresponding row to to_string: + TO_STRING_CASE(label_type_t::LABEL_NAME) + 3. Add the new type to the union: + LABEL_TYPE _LABEL_NAME; + 3. Add the corresponding row to polylabel::copy_from + case (label_type_t::LABEL_NAME): + init_as_LABEL_NAME(std::move(other._LABEL_NAME)); + break; + 4. Add the corresponding row to polylabel::move_from + case (label_type_t::LABEL_NAME): + init_as_LABEL_NAME(std::move(other._LABEL_NAME)); + break; + 5. Add the corresponding row to polylabel::reset + case (label_type_t::LABEL_NAME): + destruct(_LABEL_NAME); + break; + 6. Add another three methods that correspond to the new type according to this template + template + LABEL_TYPE& init_as_LABEL_NAME(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_LABEL_NAME) LABEL_TYPE(std::forward(args)...); + _tag = label_type_t::LABEL_NAME; + return _LABEL_NAME; + } + + const LABEL_TYPE& LABEL_NAME() const + { + ensure_is_type(label_type_t::LABEL_NAME); + return _LABEL_NAME; + } + + LABEL_TYPE& LABEL_NAME() + { + ensure_is_type(label_type_t::LABEL_NAME); + return _LABEL_NAME; + } +*/ + +#include "no_label.h" +#include "simple_label.h" +#include "multiclass.h" +#include "multilabel.h" +#include "cost_sensitive.h" +#include "cb.h" +#include "example_predict.h" +#include "ccb_label.h" + +#define TO_STRING_CASE(enum_type) \ + case enum_type: \ + return #enum_type; + +enum class label_type_t +{ + unset, + empty, + simple, + multi, + cs, + cb, + conditional_contextual_bandit, + cb_eval, + multilabels +}; + +inline const char* to_string(label_type_t label_type) +{ + switch (label_type) + { + TO_STRING_CASE(label_type_t::unset) + TO_STRING_CASE(label_type_t::empty) + TO_STRING_CASE(label_type_t::simple) + TO_STRING_CASE(label_type_t::multi) + TO_STRING_CASE(label_type_t::cs) + TO_STRING_CASE(label_type_t::cb) + TO_STRING_CASE(label_type_t::conditional_contextual_bandit) + TO_STRING_CASE(label_type_t::cb_eval) + TO_STRING_CASE(label_type_t::multilabels) + default: + return ""; + } +} + +struct polylabel +{ + private: + union { + no_label::no_label _empty; + label_data _simple; + MULTICLASS::label_t _multi; + COST_SENSITIVE::label _cs; + CB::label _cb; + CCB::label _conditional_contextual_bandit; + CB_EVAL::label _cb_eval; + MULTILABEL::labels _multilabels; + }; + label_type_t _tag; + + inline void ensure_is_type(label_type_t type) const + { +#ifndef NDEBUG + if (_tag != type) + { + THROW("Expected type: " << to_string(type) << ", but found: " << to_string(_tag)); + } +#else + _UNUSED(type); +#endif + } + + template + void destruct(T& item) + { + item.~T(); + } + + // These two functions only differ by parameter + void copy_from(const polylabel& other) + { + switch (other._tag) + { + case (label_type_t::unset): + break; + case (label_type_t::empty): + init_as_empty(other._empty); + break; + case (label_type_t::simple): + init_as_simple(other._simple); + break; + case (label_type_t::multi): + init_as_multi(other._multi); + break; + case (label_type_t::cs): + init_as_cs(other._cs); + break; + case (label_type_t::cb): + init_as_cb(other._cb); + break; + case (label_type_t::conditional_contextual_bandit): + init_as_ccb(other._conditional_contextual_bandit); + break; + case (label_type_t::cb_eval): + init_as_cb_eval(other._cb_eval); + break; + case (label_type_t::multilabels): + init_as_multilabels(other._multilabels); + break; + default:; + } + } + + void move_from(polylabel&& other) + { + switch (other._tag) + { + case (label_type_t::unset): + break; + case (label_type_t::empty): + init_as_empty(std::move(other._empty)); + break; + case (label_type_t::simple): + init_as_simple(std::move(other._simple)); + break; + case (label_type_t::multi): + init_as_multi(std::move(other._multi)); + break; + case (label_type_t::cs): + init_as_cs(std::move(other._cs)); + break; + case (label_type_t::cb): + init_as_cb(std::move(other._cb)); + break; + case (label_type_t::conditional_contextual_bandit): + init_as_ccb(std::move(other._conditional_contextual_bandit)); + break; + case (label_type_t::cb_eval): + init_as_cb_eval(std::move(other._cb_eval)); + break; + case (label_type_t::multilabels): + init_as_multilabels(std::move(other._multilabels)); + break; + default:; + } + } + + public: + polylabel() { _tag = label_type_t::unset; // Perhaps we should memset here? + }; + ~polylabel() { reset(); } + + polylabel(polylabel&& other) + { + _tag = label_type_t::unset; + move_from(std::move(other)); + } + + polylabel& operator=(polylabel&& other) + { + reset(); + move_from(std::move(other)); + return *this; + } + + polylabel(const polylabel& other) { + _tag = label_type_t::unset; + copy_from(other); + } + + polylabel& operator=(const polylabel& other) { + reset(); + copy_from(other); + return *this; + } + + label_type_t get_type() const { return _tag; } + + void reset() + { + switch (_tag) + { + case (label_type_t::unset): + // Nothing to do! Whatever was in here has already been destroyed. + return; + case (label_type_t::empty): + destruct(_empty); + break; + case (label_type_t::simple): + destruct(_simple); + break; + case (label_type_t::multi): + destruct(_multi); + break; + case (label_type_t::cs): + destruct(_cs); + break; + case (label_type_t::cb): + destruct(_cb); + break; + case (label_type_t::conditional_contextual_bandit): + destruct(_conditional_contextual_bandit); + break; + case (label_type_t::cb_eval): + destruct(_cb_eval); + break; + case (label_type_t::multilabels): + destruct(_multilabels); + break; + default:; + } + + _tag = label_type_t::unset; + } + + template + no_label::no_label& init_as_empty(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_empty) no_label::no_label(std::forward(args)...); + _tag = label_type_t::empty; + return _empty; + } + + const no_label::no_label& empty() const + { + ensure_is_type(label_type_t::empty); + return _empty; + } + + no_label::no_label& empty() + { + ensure_is_type(label_type_t::empty); + return _empty; + } + + template + label_data& init_as_simple(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_simple) label_data(std::forward(args)...); + _tag = label_type_t::simple; + return _simple; + } + + const label_data& simple() const + { + ensure_is_type(label_type_t::simple); + return _simple; + } + + label_data& simple() + { + ensure_is_type(label_type_t::simple); + return _simple; + } + + template + MULTICLASS::label_t& init_as_multi(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_multi) MULTICLASS::label_t(std::forward(args)...); + _tag = label_type_t::multi; + return _multi; + } + + const MULTICLASS::label_t& multi() const + { + ensure_is_type(label_type_t::multi); + return _multi; + } + + MULTICLASS::label_t& multi() + { + ensure_is_type(label_type_t::multi); + return _multi; + } + + template + COST_SENSITIVE::label& init_as_cs(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_cs) COST_SENSITIVE::label(std::forward(args)...); + _tag = label_type_t::cs; + return _cs; + } + + const COST_SENSITIVE::label& cs() const + { + ensure_is_type(label_type_t::cs); + return _cs; + } + + COST_SENSITIVE::label& cs() + { + ensure_is_type(label_type_t::cs); + return _cs; + } + + template + CB::label& init_as_cb(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_cb) CB::label(std::forward(args)...); + _tag = label_type_t::cb; + return _cb; + } + const CB::label& cb() const + { + ensure_is_type(label_type_t::cb); + return _cb; + } + + CB::label& cb() + { + ensure_is_type(label_type_t::cb); + return _cb; + } + + template + CCB::label& init_as_ccb(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_conditional_contextual_bandit) CCB::label(std::forward(args)...); + _tag = label_type_t::conditional_contextual_bandit; + return _conditional_contextual_bandit; + } + + const CCB::label& ccb() const + { + ensure_is_type(label_type_t::conditional_contextual_bandit); + return _conditional_contextual_bandit; + } + + CCB::label& ccb() + { + ensure_is_type(label_type_t::conditional_contextual_bandit); + return _conditional_contextual_bandit; + } + + template + CB_EVAL::label& init_as_cb_eval(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_cb_eval) CB_EVAL::label(std::forward(args)...); + _tag = label_type_t::cb_eval; + return _cb_eval; + } + + const CB_EVAL::label& cb_eval() const + { + ensure_is_type(label_type_t::cb_eval); + return _cb_eval; + } + + CB_EVAL::label& cb_eval() + { + ensure_is_type(label_type_t::cb_eval); + return _cb_eval; + } + + template + MULTILABEL::labels& init_as_multilabels(Args&&... args) + { + ensure_is_type(label_type_t::unset); + new (&_multilabels) MULTILABEL::labels(std::forward(args)...); + _tag = label_type_t::multilabels; + return _multilabels; + } + + const MULTILABEL::labels& multilabels() const + { + ensure_is_type(label_type_t::multilabels); + return _multilabels; + } + + MULTILABEL::labels& multilabels() + { + ensure_is_type(label_type_t::multilabels); + return _multilabels; + } +}; diff --git a/vowpalwabbit/label_dictionary.cc b/vowpalwabbit/label_dictionary.cc index 3eeeb79f01c..c1f19aabda3 100644 --- a/vowpalwabbit/label_dictionary.cc +++ b/vowpalwabbit/label_dictionary.cc @@ -95,9 +95,7 @@ void set_label_features(label_feature_map& lfm, size_t lab, features& fs) { if (lfm.find(lab) == lfm.end()) return; - features tmp_features; - tmp_features.deep_copy_from(fs); - lfm.emplace(lab, std::move(tmp_features)); + lfm.emplace(lab, fs); } } // namespace LabelDict diff --git a/vowpalwabbit/label_parser.cc b/vowpalwabbit/label_parser.cc new file mode 100644 index 00000000000..79bddd8c5c5 --- /dev/null +++ b/vowpalwabbit/label_parser.cc @@ -0,0 +1,12 @@ +#include "label_parser.h" +#include "label.h" + +void polylabel_copy_label(polylabel& left, polylabel& right) +{ + left = right; +} + +void polylabel_delete_label(polylabel& label) +{ + label.reset(); +} \ No newline at end of file diff --git a/vowpalwabbit/label_parser.h b/vowpalwabbit/label_parser.h index 3a8fa533070..1ab18049f4a 100644 --- a/vowpalwabbit/label_parser.h +++ b/vowpalwabbit/label_parser.h @@ -12,19 +12,26 @@ struct parser; struct shared_data; +struct polylabel; + +void polylabel_copy_label(polylabel& left, polylabel& right); +void polylabel_delete_label(polylabel& label); struct label_parser { - void (*default_label)(void*); - void (*parse_label)(parser*, shared_data*, void*, v_array&); - void (*cache_label)(void*, io_buf& cache); - size_t (*read_cached_label)(shared_data*, void*, io_buf& cache); - void (*delete_label)(void*); - float (*get_weight)(void*); - void (*copy_label)(void*, void*); // copy_label(dst,src) performs a DEEP copy of src into dst (dst is allocated + void (*default_label)(polylabel&); + void (*parse_label)(parser*, shared_data*, polylabel&, v_array&); + void (*cache_label)(polylabel&, io_buf& cache); + size_t (*read_cached_label)(shared_data*, polylabel&, io_buf& cache); + VW_DEPRECATED("Removed") + void (*delete_label)(polylabel&); + float (*get_weight)(polylabel&); + VW_DEPRECATED("Removed") + void (*copy_label)(polylabel&, polylabel&); // copy_label(dst,src) performs a DEEP copy of src into dst (dst is allocated // correctly). if this function is nullptr, then we assume that a memcpy of size // label_size is sufficient, so you need only specify this function if your label - // constains, for instance, pointers (otherwise you'll get double-free errors) - bool (*test_label)(void*); + // constains, for instance, pointers (otherwise you'll get double-free errors) size_t label_size; + bool (*test_label)(polylabel&); + VW_DEPRECATED("Removed") size_t label_size; }; diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc index a857ebef068..a8aaaf47f5b 100644 --- a/vowpalwabbit/lda_core.cc +++ b/vowpalwabbit/lda_core.cc @@ -87,18 +87,6 @@ struct lda inline float powf(float x, float p); inline void expdigammify(vw &all, float *gamma); inline void expdigammify_2(vw &all, float *gamma, float *norm); - - ~lda() - { - Elogtheta.delete_v(); - decay_levels.delete_v(); - total_new.delete_v(); - examples.delete_v(); - total_lambda.delete_v(); - doc_lengths.delete_v(); - digammas.delete_v(); - v.delete_v(); - } }; // #define VW_NO_INLINE_SIMD @@ -677,8 +665,9 @@ static inline float find_cw(lda &l, float *u_for_w, float *v) namespace { // Effectively, these are static and not visible outside the compilation unit. -v_array new_gamma = v_init(); -v_array old_gamma = v_init(); +// TODO: Make these non global as it makes this code non threadsafe +v_array new_gamma; +v_array old_gamma; } // namespace // Returns an estimate of the part of the variational bound that @@ -731,10 +720,10 @@ float lda_loop(lda &l, v_array &Elogtheta, float *v, example *ec, float) for (size_t k = 0; k < l.topics; k++) new_gamma[k] = new_gamma[k] * v[k] + l.lda_alpha; } while (average_diff(*l.all, old_gamma.begin(), new_gamma.begin()) > l.lda_epsilon); - ec->pred.scalars.clear(); - ec->pred.scalars.resize(l.topics); - memcpy(ec->pred.scalars.begin(), new_gamma.begin(), l.topics * sizeof(float)); - ec->pred.scalars.end() = ec->pred.scalars.begin() + l.topics; + ec->pred.scalars().clear(); + ec->pred.scalars().resize(l.topics); + memcpy(ec->pred.scalars().begin(), new_gamma.begin(), l.topics * sizeof(float)); + ec->pred.scalars().end() = ec->pred.scalars().begin() + l.topics; score += theta_kl(l, Elogtheta, new_gamma.begin()); @@ -846,7 +835,7 @@ void save_load(lda &l, io_buf &model_file, bool read, bool text) void return_example(vw &all, example &ec) { all.sd->update(ec.test_only, true, ec.loss, ec.weight, ec.num_features); - for (int f : all.final_prediction_sink) MWT::print_scalars(f, ec.pred.scalars, ec.tag); + for (int f : all.final_prediction_sink) MWT::print_scalars(f, ec.pred.scalars(), ec.tag); if (all.sd->weighted_examples() >= all.sd->dump_interval && !all.quiet) all.sd->print_update( @@ -866,12 +855,12 @@ void learn_batch(lda &l) // do in this case, we just return. for (size_t d = 0; d < l.examples.size(); d++) { - l.examples[d]->pred.scalars.clear(); - l.examples[d]->pred.scalars.resize(l.topics); - memset(l.examples[d]->pred.scalars.begin(), 0, l.topics * sizeof(float)); - l.examples[d]->pred.scalars.end() = l.examples[d]->pred.scalars.begin() + l.topics; + l.examples[d]->pred.scalars().clear(); + l.examples[d]->pred.scalars().resize(l.topics); + memset(l.examples[d]->pred.scalars().begin(), 0, l.topics * sizeof(float)); + l.examples[d]->pred.scalars().end() = l.examples[d]->pred.scalars().begin() + l.topics; - l.examples[d]->pred.scalars.clear(); + l.examples[d]->pred.scalars().clear(); return_example(*l.all, *l.examples[d]); } l.examples.clear(); @@ -994,6 +983,18 @@ void learn(lda &l, LEARNER::single_learner &, example &ec) uint32_t num_ex = (uint32_t)l.examples.size(); l.examples.push_back(&ec); l.doc_lengths.push_back(0); + + // The contract of a reduction is that prediction and label must be valid on the way in and out. + // In the LDA batch, examples are cleared and so it breaks this contract. Copying them here only + // for the final example allows us to support that. This is not great either and should be revisited. + polylabel pl; + polyprediction pp; + if (num_ex + 1 == l.minibatch) + { + pl = ec.l; + pp = ec.pred; + } + for (features &fs : ec) { for (features::iterator &f : fs) @@ -1003,8 +1004,12 @@ void learn(lda &l, LEARNER::single_learner &, example &ec) l.doc_lengths[num_ex] += (int)f.value(); } } - if (++num_ex == l.minibatch) + if (num_ex + 1 == l.minibatch) + { learn_batch(l); + ec.l = std::move(pl); + ec.pred = std::move(pp); + } } void learn_with_metrics(lda &l, LEARNER::single_learner &base, example &ec) @@ -1315,7 +1320,6 @@ LEARNER::base_learner *lda_setup(options_i &options, vw &all) return nullptr; all.lda = (uint32_t)ld->topics; - all.delete_prediction = delete_scalars; ld->sorted_features = std::vector(); ld->total_lambda_init = false; ld->all = &all; @@ -1361,6 +1365,6 @@ LEARNER::base_learner *lda_setup(options_i &options, vw &all) l.set_finish_example(finish_example); l.set_end_examples(end_examples); l.set_end_pass(end_pass); - + l.label_type = label_type_t::empty; return make_base(l); } diff --git a/vowpalwabbit/learner.cc b/vowpalwabbit/learner.cc index f1b5e9a25a2..d7f3e97c66a 100644 --- a/vowpalwabbit/learner.cc +++ b/vowpalwabbit/learner.cc @@ -8,28 +8,6 @@ #include "parse_regressor.h" #include "parse_dispatch_loop.h" - -#define CASE(type) \ - case type: \ - return #type; - -const char* to_string(prediction_type_t prediction_type) -{ - switch (prediction_type) - { - CASE(prediction_type_t::scalar) - CASE(prediction_type_t::scalars) - CASE(prediction_type_t::action_scores) - CASE(prediction_type_t::action_probs) - CASE(prediction_type_t::multiclass) - CASE(prediction_type_t::multilabels) - CASE(prediction_type_t::prob) - CASE(prediction_type_t::multiclassprobs) - default: - return ""; - } -} - namespace LEARNER { void learn_ex(example& ec, vw& all) @@ -72,7 +50,7 @@ inline bool example_is_newline_not_header(example& ec, vw& all) { // If we are using CCB, test against CCB implementation otherwise fallback to previous behavior. bool is_header = false; - if (all.label_type == label_type_t::ccb) + if (all.get_label_type() == label_type_t::conditional_contextual_bandit) { is_header = CCB::ec_is_example_header(ec); } @@ -168,7 +146,7 @@ class multi_example_handler bool complete_multi_ex(example* ec) { auto& master = _context.get_master(); - const bool is_test_ec = master.p->lp.test_label(&ec->l); + const bool is_test_ec = master.p->lp.test_label(ec->l); const bool is_newline = (example_is_newline_not_header(*ec, master) && is_test_ec); if (!is_newline) { diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h index 7959f5b25b0..29c6492c8e8 100644 --- a/vowpalwabbit/learner.h +++ b/vowpalwabbit/learner.h @@ -8,24 +8,10 @@ #include "multiclass.h" #include "simple_label.h" #include "parser.h" +#include "prediction.h" #include "future_compat.h" -#include - -enum class prediction_type_t -{ - scalar, - scalars, - action_scores, - action_probs, - multiclass, - multilabels, - prob, - multiclassprobs, - decision_probs -}; - -const char* to_string(prediction_type_t prediction_type); +#include namespace LEARNER { @@ -56,8 +42,8 @@ inline func_data tuple_dbf(void* data, base_learner* base, void (*func)(void*)) struct learn_data { using fn = void (*)(void* data, base_learner& base, void* ex); - using multi_fn = void (*)(void* data, base_learner& base, void* ex, size_t count, size_t step, polyprediction* pred, - bool finalize_predictions); + using multi_fn = void (*)(void* data, base_learner& base, void* ex, size_t count, size_t step, + polyprediction* pred, bool finalize_predictions); void* data; base_learner* base; @@ -128,6 +114,52 @@ inline void decrement_offset(multi_ex& ec_seq, const size_t increment, const siz } } +template +void check_prediction_state(T& example_obj, prediction_type_t pred_type) = delete; + +template <> +inline void check_prediction_state(example& example_obj, prediction_type_t pred_type) +{ + // The compiler sees these as unused as the only place they are used in an assert statement. + _UNUSED(pred_type); + _UNUSED(example_obj); + assert(example_obj.pred.get_type() == pred_type); +} + +template <> +inline void check_prediction_state(multi_ex& example_obj, prediction_type_t pred_type) +{ + _UNUSED(pred_type); + _UNUSED(example_obj); + if (example_obj.size() > 0) + { + assert(example_obj[0]->pred.get_type() == pred_type); + } +} + +template +void check_label_state(T& example_obj, label_type_t label_type) = delete; + +template <> +inline void check_label_state(example& example_obj, label_type_t label_type) +{ + // The compiler sees these as unused as the only place they are used in an assert statement. + _UNUSED(label_type); + _UNUSED(example_obj); + assert(example_obj.l.get_type() == label_type); +} + +template <> +inline void check_label_state(multi_ex& example_obj, label_type_t label_type) +{ + _UNUSED(label_type); + _UNUSED(example_obj); + if (example_obj.size() > 0) + { + assert(example_obj[0]->l.get_type() == label_type); + } +} + template struct learner { @@ -145,6 +177,7 @@ struct learner learner(){}; // Should only be able to construct a learner through init_learner function public: prediction_type_t pred_type; + label_type_t label_type; size_t weights; // this stores the number of "weight vectors" required by the learner. size_t increment; bool is_multiline; // Is this a single-line or multi-line reduction? @@ -157,18 +190,30 @@ struct learner { assert((is_multiline && std::is_same::value) || (!is_multiline && std::is_same::value)); // sanity check under debug compile + check_prediction_state(ec, pred_type); + check_label_state(ec, label_type); + increment_offset(ec, increment, i); learn_fd.learn_f(learn_fd.data, *learn_fd.base, (void*)&ec); decrement_offset(ec, increment, i); + + check_prediction_state(ec, pred_type); + check_label_state(ec, label_type); } inline void predict(E& ec, size_t i = 0) { assert((is_multiline && std::is_same::value) || (!is_multiline && std::is_same::value)); // sanity check under debug compile + check_prediction_state(ec, pred_type); + check_label_state(ec, label_type); + increment_offset(ec, increment, i); learn_fd.predict_f(learn_fd.data, *learn_fd.base, (void*)&ec); decrement_offset(ec, increment, i); + + check_prediction_state(ec, pred_type); + check_label_state(ec, label_type); } inline void multipredict(E& ec, size_t lo, size_t count, polyprediction* pred, bool finalize_predictions) @@ -184,7 +229,7 @@ struct learner if (finalize_predictions) pred[c] = ec.pred; // TODO: this breaks for complex labels because = doesn't do deep copy! else - pred[c].scalar = ec.partial_prediction; + pred[c].scalar() = ec.partial_prediction; // pred[c].scalar = finalize_prediction ec.partial_prediction; // TODO: this breaks for complex labels because = // doesn't do deep copy! // note works if ec.partial_prediction, but only if finalize_prediction is run???? increment_offset(ec, increment, 1); @@ -307,8 +352,8 @@ struct learner } template - static learner& init_learner(T* dat, L* base, void (*learn)(T&, L&, E&), void (*predict)(T&, L&, E&), size_t ws, - prediction_type_t pred_type) + static learner& init_learner( + T* dat, L* base, void (*learn)(T&, L&, E&), void (*predict)(T&, L&, E&), size_t ws, prediction_type_t pred_type) { learner& ret = calloc_or_throw >(); @@ -355,6 +400,7 @@ struct learner ret.learn_fd.predict_f = (learn_data::fn)predict; ret.learn_fd.multipredict_f = nullptr; ret.pred_type = pred_type; + ret.label_type = label_type_t::unset; ret.is_multiline = std::is_same::value; return ret; @@ -376,8 +422,8 @@ template learner& init_learner( free_ptr& dat, void (*learn)(T&, L&, E&), void (*predict)(T&, L&, E&), size_t params_per_weight) { - auto ret = - &learner::init_learner(dat.get(), (L*)nullptr, learn, predict, params_per_weight, prediction_type_t::scalar); + auto ret = &learner::init_learner( + dat.get(), (L*)nullptr, learn, predict, params_per_weight, prediction_type_t::scalar); dat.release(); return *ret; @@ -431,8 +477,7 @@ learner& init_learner(L* base, void (*learn)(T&, L&, E&), void (*predict)( // multiclass reduction template learner& init_multiclass_learner(free_ptr& dat, L* base, void (*learn)(T&, L&, E&), - void (*predict)(T&, L&, E&), parser* p, size_t ws, - prediction_type_t pred_type = prediction_type_t::multiclass) + void (*predict)(T&, L&, E&), parser* p, size_t ws, prediction_type_t pred_type = prediction_type_t::multiclass) { learner& l = learner::init_learner(dat.get(), base, learn, predict, ws, pred_type); @@ -444,8 +489,7 @@ learner& init_multiclass_learner(free_ptr& dat, L* base, void (*learn)( template learner& init_cost_sensitive_learner(free_ptr& dat, L* base, void (*learn)(T&, L&, E&), - void (*predict)(T&, L&, E&), parser* p, size_t ws, - prediction_type_t pred_type = prediction_type_t::multiclass) + void (*predict)(T&, L&, E&), parser* p, size_t ws, prediction_type_t pred_type = prediction_type_t::multiclass) { learner& l = learner::init_learner(dat.get(), base, learn, predict, ws, pred_type); dat.release(); diff --git a/vowpalwabbit/log_multi.cc b/vowpalwabbit/log_multi.cc index 6d5e63d10fd..dee18b127cf 100644 --- a/vowpalwabbit/log_multi.cc +++ b/vowpalwabbit/log_multi.cc @@ -47,7 +47,7 @@ class node_pred } }; -typedef struct +struct node { // everyone has uint32_t parent; // the parent node @@ -68,7 +68,7 @@ typedef struct // leaf has uint32_t max_count; // the number of samples of the most common label uint32_t max_count_label; // the most common label -} node; +}; struct log_multi { @@ -83,13 +83,6 @@ struct log_multi uint32_t swap_resist; uint32_t nbofswaps; - - ~log_multi() - { - // save_node_stats(b); - for (auto& node : nodes) node.preds.delete_v(); - nodes.delete_v(); - } }; inline void init_leaf(node& n) @@ -112,7 +105,6 @@ inline node init_node() node.parent = 0; node.min_count = 0; - node.preds = v_init(); init_leaf(node); return node; @@ -251,13 +243,13 @@ void train_node( log_multi& b, single_learner& base, example& ec, uint32_t& current, uint32_t& class_index, uint32_t /* depth */) { if (b.nodes[current].norm_Eh > b.nodes[current].preds[class_index].norm_Ehk) - ec.l.simple.label = -1.f; + ec.l.simple().label = -1.f; else - ec.l.simple.label = 1.f; + ec.l.simple().label = 1.f; base.learn(ec, b.nodes[current].base_predictor); // depth - ec.l.simple.label = FLT_MAX; + ec.l.simple().label = FLT_MAX; base.predict(ec, b.nodes[current].base_predictor); // depth b.nodes[current].Eh += (double)ec.partial_prediction; @@ -302,47 +294,58 @@ inline uint32_t descend(node& n, float prediction) void predict(log_multi& b, single_learner& base, example& ec) { - MULTICLASS::label_t mc = ec.l.multi; + MULTICLASS::label_t mc = ec.l.multi(); + + ec.l.reset(); + ec.l.init_as_simple(FLT_MAX, 0.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(); - ec.l.simple = {FLT_MAX, 0.f, 0.f}; uint32_t cn = 0; uint32_t depth = 0; while (b.nodes[cn].internal) { base.predict(ec, b.nodes[cn].base_predictor); // depth - cn = descend(b.nodes[cn], ec.pred.scalar); + cn = descend(b.nodes[cn], ec.pred.scalar()); depth++; } - ec.pred.multiclass = b.nodes[cn].max_count_label; - ec.l.multi = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = b.nodes[cn].max_count_label; + ec.l.reset(); + ec.l.init_as_multi() = mc; } void learn(log_multi& b, single_learner& base, example& ec) { // verify_min_dfs(b, b.nodes[0]); - if (ec.l.multi.label == (uint32_t)-1 || b.progress) + if (ec.l.multi().label == (uint32_t)-1 || b.progress) predict(b, base, ec); - if (ec.l.multi.label != (uint32_t)-1) // if training the tree + if (ec.l.multi().label != (uint32_t)-1) // if training the tree { - MULTICLASS::label_t mc = ec.l.multi; - uint32_t start_pred = ec.pred.multiclass; + MULTICLASS::label_t mc = ec.l.multi(); + uint32_t start_pred = ec.pred.multiclass(); uint32_t class_index = 0; - ec.l.simple = {FLT_MAX, 0.f, 0.f}; + ec.l.reset(); + ec.l.init_as_simple(FLT_MAX, 0.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(); uint32_t cn = 0; uint32_t depth = 0; while (children(b, cn, class_index, mc.label)) { train_node(b, base, ec, cn, class_index, depth); - cn = descend(b.nodes[cn], ec.pred.scalar); + cn = descend(b.nodes[cn], ec.pred.scalar()); depth++; } b.nodes[cn].min_count++; update_min_count(b, cn); - ec.pred.multiclass = start_pred; - ec.l.multi = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = start_pred; + ec.l.reset(); + ec.l.init_as_multi() = mc; } } @@ -520,6 +523,6 @@ base_learner* log_multi_setup(options_i& options, vw& all) // learner setup learner& l = init_multiclass_learner( data, as_singleline(setup_base(options, all)), learn, predict, all.p, data->max_predictors); l.set_save_load(save_load_tree); - + l.label_type = label_type_t::multi; return make_base(l); } diff --git a/vowpalwabbit/lrq.cc b/vowpalwabbit/lrq.cc index ab9b617f891..9f3a7d0185f 100644 --- a/vowpalwabbit/lrq.cc +++ b/vowpalwabbit/lrq.cc @@ -40,8 +40,7 @@ inline float cheesyrand(uint64_t x) return merand48(seed); } - -constexpr inline bool example_is_test(example& ec) { return ec.l.simple.label == FLT_MAX; } +inline bool example_is_test(example& ec) { return ec.l.simple().label == FLT_MAX; } void reset_seed(LRQstate& lrq) { @@ -140,13 +139,13 @@ void predict_or_learn(LRQstate& lrq, single_learner& base, example& ec) // Restore example if (iter == 0) { - first_prediction = ec.pred.scalar; + first_prediction = ec.pred.scalar(); first_loss = ec.loss; first_uncertainty = ec.confidence; } else { - ec.pred.scalar = first_prediction; + ec.pred.scalar() = first_prediction; ec.loss = first_loss; ec.confidence = first_uncertainty; } @@ -213,7 +212,7 @@ base_learner* lrq_setup(options_i& options, vw& all) learner& l = init_learner( lrq, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn, 1 + maxk); l.set_end_pass(reset_seed); - + l.label_type = label_type_t::simple; // TODO: leaks memory ? return make_base(l); } diff --git a/vowpalwabbit/lrqfa.cc b/vowpalwabbit/lrqfa.cc index 3df6e8ac2d6..af29a8dbd90 100644 --- a/vowpalwabbit/lrqfa.cc +++ b/vowpalwabbit/lrqfa.cc @@ -26,7 +26,7 @@ inline float cheesyrand(uint64_t x) return merand48(seed); } -constexpr inline bool example_is_test(example& ec) { return ec.l.simple.label == FLT_MAX; } +inline bool example_is_test(example& ec) { return ec.l.simple().label == FLT_MAX; } template void predict_or_learn(LRQFAstate& lrq, single_learner& base, example& ec) @@ -109,12 +109,12 @@ void predict_or_learn(LRQFAstate& lrq, single_learner& base, example& ec) // Restore example if (iter == 0) { - first_prediction = ec.pred.scalar; + first_prediction = ec.pred.scalar(); first_loss = ec.loss; } else { - ec.pred.scalar = first_prediction; + ec.pred.scalar() = first_prediction; ec.loss = first_loss; } @@ -158,6 +158,6 @@ LEARNER::base_learner* lrqfa_setup(options_i& options, vw& all) all.wpp = all.wpp * (uint64_t)(1 + lrq->k); learner& l = init_learner(lrq, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn, 1 + lrq->field_name.size() * lrq->k); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/marginal.cc b/vowpalwabbit/marginal.cc index c0e7550548c..a0f7c5702fa 100644 --- a/vowpalwabbit/marginal.cc +++ b/vowpalwabbit/marginal.cc @@ -56,7 +56,7 @@ template void make_marginal(data& sm, example& ec) { uint64_t mask = sm.all->weights.mask(); - float label = ec.l.simple.label; + float label = ec.l.simple().label; vw& all = *sm.all; sm.alg_loss = 0.; sm.net_weight = 0.; @@ -132,7 +132,7 @@ void compute_expert_loss(data& sm, example& ec) { vw& all = *sm.all; // add in the feature-based expert and normalize, - float label = ec.l.simple.label; + float label = ec.l.simple().label; if (sm.net_weight + sm.net_feature_weight > 0.) sm.average_pred += sm.net_feature_weight * sm.feature_pred; @@ -143,7 +143,7 @@ void compute_expert_loss(data& sm, example& ec) } float inv_weight = 1.0f / (sm.net_weight + sm.net_feature_weight); sm.average_pred *= inv_weight; - ec.pred.scalar = sm.average_pred; + ec.pred.scalar() = sm.average_pred; ec.partial_prediction = sm.average_pred; if (is_learn) @@ -157,7 +157,7 @@ void update_marginal(data& sm, example& ec) { vw& all = *sm.all; uint64_t mask = sm.all->weights.mask(); - float label = ec.l.simple.label; + float label = ec.l.simple().label; float weight = ec.weight; if (sm.unweighted_marginals) weight = 1.; @@ -189,7 +189,7 @@ void update_marginal(data& sm, example& ec) e.second.weight = get_adanormalhedge_weights(e.second.regret, e.second.abs_regret); } - m.first = m.first * (1. - sm.decay) + ec.l.simple.label * weight; + m.first = m.first * (1. - sm.decay) + ec.l.simple().label * weight; m.second = m.second * (1. - sm.decay) + weight; } } @@ -203,7 +203,7 @@ void predict_or_learn(data& sm, LEARNER::single_learner& base, example& ec) if (sm.update_before_learn) { base.predict(ec); - float pred = ec.pred.scalar; + float pred = ec.pred.scalar(); if (sm.compete) { sm.feature_pred = pred; @@ -213,14 +213,14 @@ void predict_or_learn(data& sm, LEARNER::single_learner& base, example& ec) update_marginal(sm, ec); // update features before learning. make_marginal(sm, ec); base.learn(ec); - ec.pred.scalar = pred; + ec.pred.scalar() = pred; } else { base.learn(ec); if (sm.compete) { - sm.feature_pred = ec.pred.scalar; + sm.feature_pred = ec.pred.scalar(); compute_expert_loss(sm, ec); } update_marginal(sm, ec); @@ -228,7 +228,7 @@ void predict_or_learn(data& sm, LEARNER::single_learner& base, example& ec) else { base.predict(ec); - float pred = ec.pred.scalar; + float pred = ec.pred.scalar(); if (sm.compete) { sm.feature_pred = pred; @@ -381,6 +381,6 @@ LEARNER::base_learner* marginal_setup(options_i& options, vw& all) LEARNER::learner& ret = init_learner(d, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn); ret.set_save_load(save_load); - + ret.label_type = label_type_t::simple; return make_base(ret); } diff --git a/vowpalwabbit/memory_tree.cc b/vowpalwabbit/memory_tree.cc index 5438800d666..2ae53a60d8b 100644 --- a/vowpalwabbit/memory_tree.cc +++ b/vowpalwabbit/memory_tree.cc @@ -44,24 +44,16 @@ void remove_at_index(v_array& array, uint32_t index) return; } -void copy_example_data(example* dst, example* src, bool oas = false) // copy example data. +void copy_example_data(example* dst, example* src) { - if (oas == false) - { - dst->l = src->l; - dst->l.multi.label = src->l.multi.label; - } - else - { - dst->l.multilabels.label_v.delete_v(); - copy_array(dst->l.multilabels.label_v, src->l.multilabels.label_v); - } + dst->l = src->l; + dst->pred = src->pred; VW::copy_example_data(false, dst, src); } inline void free_example(example* ec) { - VW::dealloc_example(nullptr, *ec); + ec->~example(); free(ec); } @@ -72,7 +64,7 @@ void diag_kronecker_prod_fs_test( features& f1, features& f2, features& prod_f, float& total_sum_feat_sq, float norm_sq1, float norm_sq2) { // originally called delete_v, but that doesn't seem right. Clearing instead - //prod_f.~features(); + // prod_f.~features(); prod_f.clear(); if (f2.indicies.size() == 0) return; @@ -102,11 +94,11 @@ void diag_kronecker_prod_fs_test( int cmpfunc(const void* a, const void* b) { return *(char*)a - *(char*)b; } -void diag_kronecker_product_test(example& ec1, example& ec2, example& ec, bool oas = false) +void diag_kronecker_product_test(example& ec1, example& ec2, example& ec) { // copy_example_data(&ec, &ec1, oas); //no_feat false, oas: true - VW::dealloc_example(nullptr, ec, nullptr); // clear ec - copy_example_data(&ec, &ec1, oas); + // VW::dealloc_example(nullptr, ec, nullptr); // clear ec + copy_example_data(&ec, &ec1); ec.total_sum_feat_sq = 0.0; // sort namespaces. pass indices array into sort...template (leave this to the end) @@ -167,7 +159,6 @@ struct node right = 0; nl = 0.001; // initilze to 1, as we need to do nl/nr. nr = 0.001; - examples_index = v_init(); } }; @@ -219,8 +210,6 @@ struct memory_tree memory_tree() { - nodes = v_init(); - examples = v_init(); alpha = 0.5; routers_used = 0; iter = 0; @@ -235,10 +224,6 @@ struct memory_tree ~memory_tree() { - for (auto& node : nodes) node.examples_index.delete_v(); - nodes.delete_v(); - for (auto ex : examples) free_example(ex); - examples.delete_v(); if (kprod_ec) free_example(kprod_ec); } @@ -273,12 +258,28 @@ float linear_kernel(const flat_example* fec1, const flat_example* fec2) float normalized_linear_prod(memory_tree& b, example* ec1, example* ec2) { + + auto l1 = std::move(ec1->l); + auto l2 = std::move(ec2->l); + ec1->l.reset(); + ec1->l.init_as_simple(); + ec2->l.reset(); + ec2->l.init_as_simple(); + flat_example* fec1 = flatten_sort_example(*b.all, ec1); flat_example* fec2 = flatten_sort_example(*b.all, ec2); float norm_sqrt = std::pow(fec1->total_sum_feat_sq * fec2->total_sum_feat_sq, 0.5f); float linear_prod = linear_kernel(fec1, fec2); - // fec1->fs.delete_v(); - // fec2->fs.delete_v(); + + // This function can be called with ec1 and ec2 pointing to the same thing. In this case, only restore ec1. + ec1->l.reset(); + ec1->l = std::move(l1); + if (ec1 != ec2) + { + ec2->l.reset(); + ec2->l = std::move(l2); + } + free_flatten_example(fec1); free_flatten_example(fec2); return linear_prod / norm_sqrt; @@ -308,7 +309,7 @@ void init_tree(memory_tree& b) b.total_num_queries = 0; b.max_routers = b.max_nodes; - std::cout << "tree initiazliation is done...." << std::endl + std::cout << "tree initiazliation is done..." << std::endl << "max nodes " << b.max_nodes << std::endl << "tree size: " << b.nodes.size() << std::endl << "max number of unique labels: " << b.max_num_labels << std::endl @@ -390,42 +391,47 @@ float train_node(memory_tree& b, single_learner& base, example& ec, const uint64 MULTILABEL::labels preds; if (b.oas == false) { - mc = ec.l.multi; - save_multi_pred = ec.pred.multiclass; + mc = ec.l.multi(); + save_multi_pred = ec.pred.multiclass(); } else { - multilabels = ec.l.multilabels; - preds = ec.pred.multilabels; + multilabels = std::move(ec.l.multilabels()); + preds = std::move(ec.pred.multilabels()); } - ec.l.simple = {1.f, 1.f, 0.}; + ec.l.reset(); + ec.l.init_as_simple() = {1.f, 1.f, 0.}; + ec.pred.reset(); + ec.pred.init_as_scalar(); base.predict(ec, b.nodes[cn].base_router); - float prediction = ec.pred.scalar; + float prediction = ec.pred.scalar(); // float imp_weight = 1.f; //no importance weight. float weighted_value = (float)((1. - b.alpha) * log(b.nodes[cn].nl / (b.nodes[cn].nr + 1e-1)) / log(2.) + b.alpha * prediction); float route_label = weighted_value < 0.f ? -1.f : 1.f; - // ec.l.simple = {route_label, imp_weight, 0.f}; + // ec.l.simple() = {route_label, imp_weight, 0.f}; float ec_input_weight = ec.weight; ec.weight = 1.f; - ec.l.simple = {route_label, 1., 0.f}; + ec.l.simple() = {route_label, 1., 0.f}; base.learn(ec, b.nodes[cn].base_router); // update the router according to the new example. base.predict(ec, b.nodes[cn].base_router); - float save_binary_scalar = ec.pred.scalar; + float save_binary_scalar = ec.pred.scalar(); + ec.l.reset(); + ec.pred.reset(); if (b.oas == false) { - ec.l.multi = mc; - ec.pred.multiclass = save_multi_pred; + ec.l.init_as_multi() = mc; + ec.pred.init_as_multiclass() = save_multi_pred; } else { - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.init_as_multilabels() = std::move(preds); + ec.l.init_as_multilabels() = std::move(multilabels); } ec.weight = ec_input_weight; @@ -467,51 +473,60 @@ void split_leaf(memory_tree& b, single_learner& base, const uint64_t cn) for (size_t ec_id = 0; ec_id < b.nodes[cn].examples_index.size(); ec_id++) // scan all examples stored in the cn { uint32_t ec_pos = b.nodes[cn].examples_index[ec_id]; + auto& current_ex = *b.examples[ec_pos]; MULTICLASS::label_t mc; uint32_t save_multi_pred = 0; MULTILABEL::labels multilabels; MULTILABEL::labels preds; if (b.oas == false) { - mc = b.examples[ec_pos]->l.multi; - save_multi_pred = b.examples[ec_pos]->pred.multiclass; + mc = current_ex.l.multi(); + save_multi_pred = current_ex.pred.multiclass(); } else { - multilabels = b.examples[ec_pos]->l.multilabels; - preds = b.examples[ec_pos]->pred.multilabels; + multilabels = std::move(current_ex.l.multilabels()); + preds = std::move(current_ex.pred.multilabels()); } - b.examples[ec_pos]->l.simple = {1.f, 1.f, 0.f}; - base.predict(*b.examples[ec_pos], b.nodes[cn].base_router); // re-predict - float scalar = b.examples[ec_pos]->pred.scalar; // this is spliting the leaf. - if (scalar < 0) + current_ex.l.reset(); + current_ex.l.init_as_simple() = {1.f, 1.f, 0.f}; + current_ex.pred.reset(); + current_ex.pred.init_as_scalar(); + base.predict(current_ex, b.nodes[cn].base_router); // re-predict + float scalar = current_ex.pred.scalar(); // this is spliting the leaf. + + current_ex.l.reset(); + current_ex.pred.reset(); + if (b.oas == false) { - b.nodes[left_child].examples_index.push_back(ec_pos); - float leaf_pred = train_node(b, base, *b.examples[ec_pos], left_child); - insert_descent(b.nodes[left_child], leaf_pred); // fake descent, only for update nl and nr + current_ex.l.init_as_multi() = mc; + current_ex.pred.init_as_multiclass() = save_multi_pred; } else { - b.nodes[right_child].examples_index.push_back(ec_pos); - float leaf_pred = train_node(b, base, *b.examples[ec_pos], right_child); - insert_descent(b.nodes[right_child], leaf_pred); // fake descent. for update nr and nl + current_ex.pred.init_as_multilabels() = preds; + current_ex.l.init_as_multilabels() = multilabels; } - if (b.oas == false) + if (scalar < 0) { - b.examples[ec_pos]->l.multi = mc; - b.examples[ec_pos]->pred.multiclass = save_multi_pred; + b.nodes[left_child].examples_index.push_back(ec_pos); + float leaf_pred = train_node(b, base, current_ex, left_child); + insert_descent(b.nodes[left_child], leaf_pred); // fake descent, only for update nl and nr } else { - b.examples[ec_pos]->pred.multilabels = preds; - b.examples[ec_pos]->l.multilabels = multilabels; + b.nodes[right_child].examples_index.push_back(ec_pos); + float leaf_pred = train_node(b, base, current_ex, right_child); + insert_descent(b.nodes[right_child], leaf_pred); // fake descent. for update nr and nl } } - b.nodes[cn].examples_index.delete_v(); // empty the cn's example list - b.nodes[cn].nl = std::max(double(b.nodes[left_child].examples_index.size()), 0.001); // avoid to set nl to zero - b.nodes[cn].nr = std::max(double(b.nodes[right_child].examples_index.size()), 0.001); // avoid to set nr to zero + b.nodes[cn].examples_index.clear(); // empty the cn's example list + b.nodes[cn].nl = + std::max(static_cast(b.nodes[left_child].examples_index.size()), 0.001); // avoid to set nl to zero + b.nodes[cn].nr = + std::max(static_cast(b.nodes[right_child].examples_index.size()), 0.001); // avoid to set nr to zero if (std::max(b.nodes[cn].nl, b.nodes[cn].nr) > b.max_ex_in_leaf) { @@ -565,9 +580,9 @@ void collect_labels_from_leaf(memory_tree& b, const uint64_t cn, v_arrayl.multilabels.label_v) + for (uint32_t lab : b.examples[loc]->l.multilabels().label_v) { // scan through each label: - if (v_array_contains(leaf_labs, lab) == false) + if (std::find(leaf_labs.cbegin(), leaf_labs.cend(), lab) == leaf_labs.cend()) leaf_labs.push_back(lab); } } @@ -575,42 +590,42 @@ void collect_labels_from_leaf(memory_tree& b, const uint64_t cn, v_array leaf_labs = v_init(); + v_array leaf_labs; collect_labels_from_leaf(b, cn, leaf_labs); // unique labels from the leaf. - MULTILABEL::labels multilabels = ec.l.multilabels; - MULTILABEL::labels preds = ec.pred.multilabels; - ec.l.simple = {FLT_MAX, 1.f, 0.f}; + MULTILABEL::labels& multilabels = ec.l.multilabels(); + MULTILABEL::labels& preds = ec.pred.multilabels(); + ec.l.simple() = {FLT_MAX, 1.f, 0.f}; for (size_t i = 0; i < leaf_labs.size(); i++) { - ec.l.simple.label = -1.f; - if (v_array_contains(multilabels.label_v, leaf_labs[i])) - ec.l.simple.label = 1.f; + ec.l.simple().label = -1.f; + if (std::find(multilabels.label_v.cbegin(), multilabels.label_v.cend(), leaf_labs[i]) == multilabels.label_v.cend()) + ec.l.simple().label = 1.f; base.learn(ec, b.max_routers + 1 + leaf_labs[i]); } - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.multilabels() = preds; + ec.l.multilabels() = multilabels; } inline uint32_t compute_hamming_loss_via_oas( memory_tree& b, single_learner& base, const uint64_t cn, example& ec, v_array& selected_labs) { - selected_labs.delete_v(); - v_array leaf_labs = v_init(); + selected_labs.clear(); + v_array leaf_labs; collect_labels_from_leaf(b, cn, leaf_labs); // unique labels stored in the leaf. - MULTILABEL::labels multilabels = ec.l.multilabels; - MULTILABEL::labels preds = ec.pred.multilabels; - ec.l.simple = {FLT_MAX, 1.f, 0.f}; + MULTILABEL::labels& multilabels = ec.l.multilabels(); + MULTILABEL::labels& preds = ec.pred.multilabels(); + ec.l.simple() = {FLT_MAX, 1.f, 0.f}; for (size_t i = 0; i < leaf_labs.size(); i++) { base.predict(ec, b.max_routers + 1 + leaf_labs[i]); - float score = ec.pred.scalar; + float score = ec.pred.scalar(); if (score > 0) selected_labs.push_back(leaf_labs[i]); } - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.multilabels() = preds; + ec.l.multilabels() = multilabels; - return hamming_loss(ec.l.multilabels.label_v, selected_labs); + return hamming_loss(ec.l.multilabels().label_v, selected_labs); } // pick up the "closest" example in the leaf using the score function. @@ -630,8 +645,11 @@ int64_t pick_nearest(memory_tree& b, single_learner& base, const uint64_t cn, ex if (b.learn_at_leaf == true && b.current_pass >= 1) { float tmp_s = normalized_linear_prod(b, &ec, b.examples[loc]); - diag_kronecker_product_test(ec, *b.examples[loc], *b.kprod_ec, b.oas); - b.kprod_ec->l.simple = {FLT_MAX, 0., tmp_s}; + diag_kronecker_product_test(ec, *b.examples[loc], *b.kprod_ec); + b.kprod_ec->l.reset(); + b.kprod_ec->l.init_as_simple() = {FLT_MAX, 0., tmp_s}; + b.kprod_ec->pred.reset(); + b.kprod_ec->pred.init_as_scalar(); base.predict(*b.kprod_ec, b.max_routers); score = b.kprod_ec->partial_prediction; } @@ -653,15 +671,15 @@ int64_t pick_nearest(memory_tree& b, single_learner& base, const uint64_t cn, ex // for any two examples, use number of overlap labels to indicate the similarity between these two examples. float get_overlap_from_two_examples(example& ec1, example& ec2) { - return (float)over_lap(ec1.l.multilabels.label_v, ec2.l.multilabels.label_v); + return (float)over_lap(ec1.l.multilabels().label_v, ec2.l.multilabels().label_v); } // we use F1 score as the reward signal float F1_score_for_two_examples(example& ec1, example& ec2) { float num_overlaps = get_overlap_from_two_examples(ec1, ec2); - float v1 = (float)(num_overlaps / (1e-7 + ec1.l.multilabels.label_v.size() * 1.)); - float v2 = (float)(num_overlaps / (1e-7 + ec2.l.multilabels.label_v.size() * 1.)); + float v1 = (float)(num_overlaps / (1e-7 + ec1.l.multilabels().label_v.size() * 1.)); + float v2 = (float)(num_overlaps / (1e-7 + ec2.l.multilabels().label_v.size() * 1.)); if (num_overlaps == 0.f) return 0.f; else @@ -677,33 +695,39 @@ void predict(memory_tree& b, single_learner& base, example& ec) MULTILABEL::labels preds; if (b.oas == false) { - mc = ec.l.multi; - save_multi_pred = ec.pred.multiclass; + mc = ec.l.multi(); + save_multi_pred = ec.pred.multiclass(); } else { - multilabels = ec.l.multilabels; - preds = ec.pred.multilabels; + multilabels = std::move(ec.l.multilabels()); + preds = std::move(ec.pred.multilabels()); } uint64_t cn = 0; - ec.l.simple = {-1.f, 1.f, 0.}; + ec.l.reset(); + ec.l.init_as_simple() = {-1.f, 1.f, 0.}; + ec.pred.reset(); + ec.pred.init_as_scalar(); while (b.nodes[cn].internal == 1) { // if it's internal{ base.predict(ec, b.nodes[cn].base_router); - uint64_t newcn = ec.pred.scalar < 0 ? b.nodes[cn].left : b.nodes[cn].right; // do not need to increment nl and nr. + uint64_t newcn = + ec.pred.scalar() < 0 ? b.nodes[cn].left : b.nodes[cn].right; // do not need to increment nl and nr. cn = newcn; } + ec.l.reset(); + ec.pred.reset(); if (b.oas == false) { - ec.l.multi = mc; - ec.pred.multiclass = save_multi_pred; + ec.l.init_as_multi() = mc; + ec.pred.init_as_multiclass() = save_multi_pred; } else { - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.init_as_multilabels() = std::move(preds); + ec.l.init_as_multilabels() = std::move(multilabels); } int64_t closest_ec = 0; @@ -711,11 +735,11 @@ void predict(memory_tree& b, single_learner& base, example& ec) { closest_ec = pick_nearest(b, base, cn, ec); if (closest_ec != -1) - ec.pred.multiclass = b.examples[closest_ec]->l.multi.label; + ec.pred.multiclass() = b.examples[closest_ec]->l.multi().label; else - ec.pred.multiclass = 0; + ec.pred.multiclass() = 0; - if (ec.l.multi.label != ec.pred.multiclass) + if (ec.l.multi().label != ec.pred.multiclass()) { ec.loss = ec.weight; b.num_mistakes++; @@ -730,7 +754,7 @@ void predict(memory_tree& b, single_learner& base, example& ec) reward = F1_score_for_two_examples(ec, *b.examples[closest_ec]); b.F1_score += reward; } - v_array selected_labs = v_init(); + v_array selected_labs; ec.loss = (float)compute_hamming_loss_via_oas(b, base, cn, ec, selected_labs); b.hamming_loss += ec.loss; } @@ -745,31 +769,35 @@ float return_reward_from_node(memory_tree& b, single_learner& base, uint64_t cn, MULTILABEL::labels preds; if (b.oas == false) { - mc = ec.l.multi; - save_multi_pred = ec.pred.multiclass; + mc = ec.l.multi(); + save_multi_pred = ec.pred.multiclass(); } else { - multilabels = ec.l.multilabels; - preds = ec.pred.multilabels; + multilabels = std::move(ec.l.multilabels()); + preds = std::move(ec.pred.multilabels()); } - ec.l.simple = {FLT_MAX, 1., 0.0}; + ec.l.reset(); + ec.l.init_as_simple() = {FLT_MAX, 1., 0.0}; + ec.pred.reset(); + ec.pred.init_as_scalar(); while (b.nodes[cn].internal != -1) { base.predict(ec, b.nodes[cn].base_router); - float prediction = ec.pred.scalar; + float prediction = ec.pred.scalar(); cn = prediction < 0 ? b.nodes[cn].left : b.nodes[cn].right; } - + ec.l.reset(); + ec.pred.reset(); if (b.oas == false) { - ec.l.multi = mc; - ec.pred.multiclass = save_multi_pred; + ec.l.init_as_multi() = mc; + ec.pred.init_as_multiclass() = save_multi_pred; } else { - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.init_as_multilabels() = preds; + ec.l.init_as_multilabels() = multilabels; } // get to leaf now: @@ -778,7 +806,7 @@ float return_reward_from_node(memory_tree& b, single_learner& base, uint64_t cn, closest_ec = pick_nearest(b, base, cn, ec); // no randomness for picking example. if (b.oas == false) { - if ((closest_ec != -1) && (b.examples[closest_ec]->l.multi.label == ec.l.multi.label)) + if ((closest_ec != -1) && (b.examples[closest_ec]->l.multi().label == ec.l.multi().label)) reward = 1.f; } else @@ -791,8 +819,11 @@ float return_reward_from_node(memory_tree& b, single_learner& base, uint64_t cn, if (b.learn_at_leaf == true && closest_ec != -1) { float score = normalized_linear_prod(b, &ec, b.examples[closest_ec]); - diag_kronecker_product_test(ec, *b.examples[closest_ec], *b.kprod_ec, b.oas); - b.kprod_ec->l.simple = {reward, 1.f, -score}; + diag_kronecker_product_test(ec, *b.examples[closest_ec], *b.kprod_ec); + b.kprod_ec->l.reset(); + b.kprod_ec->l.init_as_simple() = {reward, 1.f, -score}; + b.kprod_ec->pred.reset(); + b.kprod_ec->pred.init_as_scalar(); b.kprod_ec->weight = weight; base.learn(*b.kprod_ec, b.max_routers); } @@ -816,11 +847,14 @@ void learn_at_leaf_random( } if (ec_id != -1) { - if (b.examples[ec_id]->l.multi.label == ec.l.multi.label) + if (b.examples[ec_id]->l.multi().label == ec.l.multi().label) reward = 1.f; float score = normalized_linear_prod(b, &ec, b.examples[ec_id]); - diag_kronecker_product_test(ec, *b.examples[ec_id], *b.kprod_ec, b.oas); - b.kprod_ec->l.simple = {reward, 1.f, -score}; + diag_kronecker_product_test(ec, *b.examples[ec_id], *b.kprod_ec); + b.kprod_ec->l.reset(); + b.kprod_ec->l.init_as_simple() = {reward, 1.f, -score}; + b.kprod_ec->pred.reset(); + b.kprod_ec->pred.init_as_scalar(); b.kprod_ec->weight = weight; //* b.nodes[leaf_id].examples_index.size(); base.learn(*b.kprod_ec, b.max_routers); } @@ -838,22 +872,25 @@ void route_to_leaf(memory_tree& b, single_learner& base, const uint32_t& ec_arra MULTILABEL::labels preds; if (b.oas == false) { - mc = ec.l.multi; - save_multi_pred = ec.pred.multiclass; + mc = ec.l.multi(); + save_multi_pred = ec.pred.multiclass(); } else { - multilabels = ec.l.multilabels; - preds = ec.pred.multilabels; + multilabels = std::move(ec.l.multilabels()); + preds = std::move(ec.pred.multilabels()); } path.clear(); - ec.l.simple = {FLT_MAX, 1.0, 0.0}; + ec.l.reset(); + ec.l.init_as_simple() = {FLT_MAX, 1.0, 0.0}; + ec.pred.reset(); + ec.pred.init_as_scalar(); while (b.nodes[cn].internal != -1) { path.push_back(cn); // path stores node id from the root to the leaf base.predict(ec, b.nodes[cn].base_router); - float prediction = ec.pred.scalar; + float prediction = ec.pred.scalar(); if (insertion == false) cn = prediction < 0 ? b.nodes[cn].left : b.nodes[cn].right; else @@ -861,15 +898,17 @@ void route_to_leaf(memory_tree& b, single_learner& base, const uint32_t& ec_arra } path.push_back(cn); // push back the leaf + ec.l.reset(); + ec.pred.reset(); if (b.oas == false) { - ec.l.multi = mc; - ec.pred.multiclass = save_multi_pred; + ec.l.init_as_multi() = mc; + ec.pred.init_as_multiclass() = save_multi_pred; } else { - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.init_as_multilabels() = std::move(preds); + ec.l.init_as_multilabels() = std::move(multilabels); } // std::cout<<"at route to leaf: "< path_to_leaf = v_init(); + v_array path_to_leaf; route_to_leaf(b, base, ec_array_index, 0, path_to_leaf, false); // no insertion happens here. if (path_to_leaf.size() > 1) @@ -915,14 +954,18 @@ void single_query_and_learn(memory_tree& b, single_learner& base, const uint32_t float ec_input_weight = ec.weight; MULTICLASS::label_t mc; + uint32_t save_multi_pred = 0; MULTILABEL::labels multilabels; MULTILABEL::labels preds; if (b.oas == false) - mc = ec.l.multi; + { + mc = ec.l.multi(); + save_multi_pred = ec.pred.multiclass(); + } else { - multilabels = ec.l.multilabels; - preds = ec.pred.multilabels; + multilabels = std::move(ec.l.multilabels()); + preds = std::move(ec.pred.multilabels()); } ec.weight = fabs(objective); @@ -930,15 +973,23 @@ void single_query_and_learn(memory_tree& b, single_learner& base, const uint32_t ec.weight = 100.f; else if (ec.weight < .01f) ec.weight = 0.01f; - ec.l.simple = {objective < 0. ? -1.f : 1.f, 1.f, 0.}; + ec.l.reset(); + ec.l.init_as_simple() = {objective < 0. ? -1.f : 1.f, 1.f, 0.}; + ec.pred.reset(); + ec.pred.init_as_scalar(); base.learn(ec, b.nodes[cn].base_router); + ec.l.reset(); + ec.pred.reset(); if (b.oas == false) - ec.l.multi = mc; + { + ec.l.init_as_multi() = mc; + ec.pred.init_as_multiclass() = save_multi_pred; + } else { - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.init_as_multilabels() = std::move(preds); + ec.l.init_as_multilabels() = std::move(multilabels); } ec.weight = ec_input_weight; // restore the original weight } @@ -953,7 +1004,6 @@ void single_query_and_learn(memory_tree& b, single_learner& base, const uint32_t train_one_against_some_at_leaf(b, base, cn, ec); } } - path_to_leaf.delete_v(); } // using reward signals @@ -1009,9 +1059,8 @@ void experience_replay(memory_tree& b, single_learner& base) { if (b.dream_at_update == false) { - v_array tmp_path = v_init(); + v_array tmp_path; route_to_leaf(b, base, ec_id, 0, tmp_path, true); - tmp_path.delete_v(); } else { @@ -1045,7 +1094,7 @@ void learn(memory_tree& b, single_learner& base, example& ec) if (b.current_pass < 1) { // in the first pass, we need to store the memory: example* new_ec = &calloc_or_throw(); - copy_example_data(new_ec, &ec, b.oas); + copy_example_data(new_ec, &ec); b.examples.push_back(new_ec); if (b.online == true) update_rew(b, base, (uint32_t)(b.examples.size() - 1), *b.examples[b.examples.size() - 1]); // query and learn @@ -1097,18 +1146,18 @@ void save_load_example(example* ec, io_buf& model_file, bool& read, bool& text, writeit(ec->ft_offset, "ft_offset"); if (oas == false) { // multi-class - writeit(ec->l.multi.label, "multiclass_label"); - writeit(ec->l.multi.weight, "multiclass_weight"); + writeit(ec->l.multi().label, "multiclass_label"); + writeit(ec->l.multi().weight, "multiclass_weight"); } else { // multi-label - writeitvar(ec->l.multilabels.label_v.size(), "label_size", label_size); + writeitvar(ec->l.multilabels().label_v.size(), "label_size", label_size); if (read) { - ec->l.multilabels.label_v.clear(); - for (uint32_t i = 0; i < label_size; i++) ec->l.multilabels.label_v.push_back(0); + ec->l.multilabels().label_v.clear(); + for (uint32_t i = 0; i < label_size; i++) ec->l.multilabels().label_v.push_back(0); } - for (uint32_t i = 0; i < label_size; i++) writeit(ec->l.multilabels.label_v[i], "ec_label"); + for (uint32_t i = 0; i < label_size; i++) writeit(ec->l.multilabels().label_v[i], "ec_label"); } writeitvar(ec->tag.size(), "tags", tag_number); @@ -1123,7 +1172,7 @@ void save_load_example(example* ec, io_buf& model_file, bool& read, bool& text, writeitvar(ec->indices.size(), "namespaces", namespace_size); if (read) { - ec->indices.delete_v(); + ec->indices.clear(); for (uint32_t i = 0; i < namespace_size; i++) { ec->indices.push_back('\0'); @@ -1139,8 +1188,6 @@ void save_load_example(example* ec, io_buf& model_file, bool& read, bool& text, if (read) { fs->clear(); - fs->values = v_init(); - fs->indicies = v_init(); for (uint32_t f_i = 0; f_i < feat_size; f_i++) { fs->push_back(0, 0); @@ -1249,7 +1296,7 @@ base_learner* memory_tree_setup(options_i& options, vw& all) .help("number of dream operations per example (default = 1)")) .add(make_option("top_K", tree->top_K).default_value(1).help("top K prediction error (default 1)")) .add(make_option("learn_at_leaf", tree->learn_at_leaf).help("whether or not learn at leaf (defualt = True)")) - .add(make_option("oas", tree->oas).help("use oas at the leaf")) + .add(make_option("oas", tree->oas).help("use oas (one against some) at the leaf")) .add(make_option("dream_at_update", tree->dream_at_update) .default_value(0) .help("turn on dream operations at reward based update as well")) @@ -1289,6 +1336,7 @@ base_learner* memory_tree_setup(options_i& options, vw& all) // srand(time(0)); l.set_save_load(save_load_memory_tree); l.set_end_pass(end_pass); + l.label_type = label_type_t::multi; return make_base(l); } // multi-label classification @@ -1299,16 +1347,13 @@ base_learner* memory_tree_setup(options_i& options, vw& all) tree, as_singleline(setup_base(options, all)), learn, predict, num_learners, prediction_type_t::multilabels); // all.p->lp = MULTILABEL::multilabel; - // all.label_type = label_type_t::multi; - // all.delete_prediction = MULTILABEL::multilabel.delete_label; // srand(time(0)); l.set_end_pass(end_pass); l.set_save_load(save_load_memory_tree); // l.set_end_pass(end_pass); all.p->lp = MULTILABEL::multilabel; - all.label_type = label_type_t::multi; - all.delete_prediction = MULTILABEL::multilabel.delete_label; + l.label_type = label_type_t::multi; return make_base(l); } diff --git a/vowpalwabbit/mf.cc b/vowpalwabbit/mf.cc index 4d84de7e3a5..5fb2bdc34dc 100644 --- a/vowpalwabbit/mf.cc +++ b/vowpalwabbit/mf.cc @@ -37,13 +37,6 @@ struct mf features temp_features; vw* all; // for pairs? and finalize - - ~mf() - { - // clean up local v_arrays - indices.delete_v(); - sub_predictions.delete_v(); - } }; template @@ -105,18 +98,18 @@ void predict(mf& data, single_learner& base, example& ec) // finalize prediction ec.partial_prediction = prediction; - ec.pred.scalar = GD::finalize_prediction(data.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(data.all->sd, ec.partial_prediction); } void learn(mf& data, single_learner& base, example& ec) { // predict with current weights predict(data, base, ec); - float predicted = ec.pred.scalar; + float predicted = ec.pred.scalar(); // update linear weights base.update(ec); - ec.pred.scalar = ec.updated_prediction; + ec.pred.scalar() = ec.updated_prediction; // store namespace indices copy_array(data.indices, ec.indices); @@ -138,7 +131,7 @@ void learn(mf& data, single_learner& base, example& ec) ec.indices[0] = left_ns; // store feature values in left namespace - data.temp_features.deep_copy_from(ec.feature_space[left_ns]); + data.temp_features = ec.feature_space[left_ns]; for (size_t k = 1; k <= data.rank; k++) { @@ -150,19 +143,19 @@ void learn(mf& data, single_learner& base, example& ec) base.update(ec, k); // restore left namespace features (undoing multiply) - fs.deep_copy_from(data.temp_features); + fs = data.temp_features; // compute new l_k * x_l scaling factors // base.predict(ec, k); // data.sub_predictions[2*k-1] = ec.partial_prediction; - // ec.pred.scalar = ec.updated_prediction; + // ec.pred.scalar() = ec.updated_prediction; } // set example to right namespace only ec.indices[0] = right_ns; // store feature values for right namespace - data.temp_features.deep_copy_from(ec.feature_space[right_ns]); + data.temp_features = ec.feature_space[right_ns]; for (size_t k = 1; k <= data.rank; k++) { @@ -172,18 +165,18 @@ void learn(mf& data, single_learner& base, example& ec) // update r^k using base learner base.update(ec, k + data.rank); - ec.pred.scalar = ec.updated_prediction; + ec.pred.scalar() = ec.updated_prediction; // restore right namespace features - fs.deep_copy_from(data.temp_features); + fs = data.temp_features; } } } // restore namespace indices - copy_array(ec.indices, data.indices); + ec.indices = data.indices; // restore original prediction - ec.pred.scalar = predicted; + ec.pred.scalar() = predicted; } void finish(mf& o) @@ -210,8 +203,11 @@ base_learner* mf_setup(options_i& options, vw& all) all.random_positive_weights = true; + auto base = as_singleline(setup_base(options, all)); learner& l = - init_learner(data, as_singleline(setup_base(options, all)), learn, predict, 2 * data->rank + 1); + init_learner(data, base, learn, predict, 2 * data->rank + 1); l.set_finish(finish); + l.label_type = base->label_type; + return make_base(l); } diff --git a/vowpalwabbit/multiclass.cc b/vowpalwabbit/multiclass.cc index d31d4ddc931..6f8e6c45e1e 100644 --- a/vowpalwabbit/multiclass.cc +++ b/vowpalwabbit/multiclass.cc @@ -11,20 +11,20 @@ namespace MULTICLASS { -char* bufread_label(label_t* ld, char* c) +char* bufread_label(label_t& ld, char* c) { - memcpy(&ld->label, c, sizeof(ld->label)); - c += sizeof(ld->label); - memcpy(&ld->weight, c, sizeof(ld->weight)); - c += sizeof(ld->weight); + memcpy(&ld.label, c, sizeof(ld.label)); + c += sizeof(ld.label); + memcpy(&ld.weight, c, sizeof(ld.weight)); + c += sizeof(ld.weight); return c; } -size_t read_cached_label(shared_data*, void* v, io_buf& cache) +size_t read_cached_label(shared_data*, polylabel& v, io_buf& cache) { - label_t* ld = (label_t*)v; + auto& ld = v.multi(); char* c; - size_t total = sizeof(ld->label) + sizeof(ld->weight); + size_t total = sizeof(ld.label) + sizeof(ld.weight); if (cache.buf_read(c, total) < total) return 0; bufread_label(ld, c); @@ -32,75 +32,77 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) return total; } -float weight(void* v) +float weight(polylabel& v) { - label_t* ld = (label_t*)v; - return (ld->weight > 0) ? ld->weight : 0.f; + auto& ld = v.multi(); + return (ld.weight > 0) ? ld.weight : 0.f; } -char* bufcache_label(label_t* ld, char* c) +char* bufcache_label(label_t& ld, char* c) { - memcpy(c, &ld->label, sizeof(ld->label)); - c += sizeof(ld->label); - memcpy(c, &ld->weight, sizeof(ld->weight)); - c += sizeof(ld->weight); + memcpy(c, &ld.label, sizeof(ld.label)); + c += sizeof(ld.label); + memcpy(c, &ld.weight, sizeof(ld.weight)); + c += sizeof(ld.weight); return c; } -void cache_label(void* v, io_buf& cache) +void cache_label(polylabel& v, io_buf& cache) { char* c; - label_t* ld = (label_t*)v; - cache.buf_write(c, sizeof(ld->label) + sizeof(ld->weight)); + auto& ld = v.multi(); + cache.buf_write(c, sizeof(ld.label) + sizeof(ld.weight)); bufcache_label(ld, c); } -void default_label(void* v) +void default_label(polylabel& v) { - label_t* ld = (label_t*)v; - ld->label = (uint32_t)-1; - ld->weight = 1.; + if (v.get_type() != label_type_t::multi) + { + v.reset(); + v.init_as_multi(); + } + auto& ld = v.multi(); + ld.label = (uint32_t)-1; + ld.weight = 1.; } -bool test_label(void* v) +bool test_label(polylabel& v) { - label_t* ld = (label_t*)v; - return ld->label == (uint32_t)-1; + auto& ld = v.multi(); + return ld.label == (uint32_t)-1; } -void delete_label(void*) {} - -void parse_label(parser*, shared_data* sd, void* v, v_array& words) +void parse_label(parser*, shared_data* sd, polylabel& v, v_array& words) { - label_t* ld = (label_t*)v; + auto& ld = v.multi(); switch (words.size()) { case 0: break; case 1: - ld->label = sd->ldict ? (uint32_t)sd->ldict->get(words[0]) : int_of_string(words[0]); - ld->weight = 1.0; + ld.label = sd->ldict ? (uint32_t)sd->ldict->get(words[0]) : int_of_string(words[0]); + ld.weight = 1.0; break; case 2: - ld->label = sd->ldict ? (uint32_t)sd->ldict->get(words[0]) : int_of_string(words[0]); - ld->weight = float_of_string(words[1]); + ld.label = sd->ldict ? (uint32_t)sd->ldict->get(words[0]) : int_of_string(words[0]); + ld.weight = float_of_string(words[1]); break; default: std::cerr << "malformed example!\n"; std::cerr << "words.size() = " << words.size() << std::endl; } - if (ld->label == 0) + if (ld.label == 0) THROW("label 0 is not allowed for multiclass. Valid labels are {1,k}" << (sd->ldict ? "\nthis likely happened because you specified an invalid label with named labels" : "")); } -label_parser mc_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, nullptr, - test_label, sizeof(label_t)}; +label_parser mc_label = {default_label, parse_label, cache_label, read_cached_label, polylabel_delete_label, weight, polylabel_copy_label, test_label, sizeof(label_t)}; void print_label_pred(vw& all, example& ec, uint32_t prediction) { - VW::string_view sv_label = all.sd->ldict->get(ec.l.multi.label); + VW::string_view sv_label = all.sd->ldict->get(ec.l.multi().label); VW::string_view sv_pred = all.sd->ldict->get(prediction); all.sd->print_update(all.holdout_set_off, all.current_pass, sv_label.empty() ? "unknown" : sv_label.to_string(), @@ -112,10 +114,10 @@ void print_probability(vw& all, example& ec, uint32_t prediction) { std::stringstream pred_ss; pred_ss << prediction << "(" << std::setw(2) << std::setprecision(0) << std::fixed - << 100 * ec.pred.scalars[prediction - 1] << "%)"; + << 100 * ec.pred.scalars()[prediction - 1] << "%)"; std::stringstream label_ss; - label_ss << ec.l.multi.label; + label_ss << ec.l.multi().label; all.sd->print_update(all.holdout_set_off, all.current_pass, label_ss.str(), pred_ss.str(), ec.num_features, all.progress_add, all.progress_arg); @@ -127,7 +129,7 @@ void print_score(vw& all, example& ec, uint32_t prediction) pred_ss << prediction; std::stringstream label_ss; - label_ss << ec.l.multi.label; + label_ss << ec.l.multi().label; all.sd->print_update(all.holdout_set_off, all.current_pass, label_ss.str(), pred_ss.str(), ec.num_features, all.progress_add, all.progress_arg); @@ -135,7 +137,7 @@ void print_score(vw& all, example& ec, uint32_t prediction) void direct_print_update(vw& all, example& ec, uint32_t prediction) { - all.sd->print_update(all.holdout_set_off, all.current_pass, ec.l.multi.label, prediction, ec.num_features, + all.sd->print_update(all.holdout_set_off, all.current_pass, ec.l.multi().label, prediction, ec.num_features, all.progress_add, all.progress_arg); } @@ -147,7 +149,7 @@ void print_update(vw& all, example& ec, uint32_t prediction) if (!all.sd->ldict) T(all, ec, prediction); else - print_label_pred(all, ec, ec.pred.multiclass); + print_label_pred(all, ec, ec.pred.multiclass()); } } @@ -160,21 +162,21 @@ void print_update_with_score(vw& all, example& ec, uint32_t pred) { print_update void finish_example(vw& all, example& ec, bool update_loss) { float loss = 0; - if (ec.l.multi.label != (uint32_t)ec.pred.multiclass && ec.l.multi.label != (uint32_t)-1) + if (ec.l.multi().label != (uint32_t)ec.pred.multiclass() && ec.l.multi().label != (uint32_t)-1) loss = ec.weight; - all.sd->update(ec.test_only, update_loss && (ec.l.multi.label != (uint32_t)-1), loss, ec.weight, ec.num_features); + all.sd->update(ec.test_only, update_loss && (ec.l.multi().label != (uint32_t)-1), loss, ec.weight, ec.num_features); for (int sink : all.final_prediction_sink) if (!all.sd->ldict) - all.print_by_ref(sink, (float)ec.pred.multiclass, 0, ec.tag); + all.print_by_ref(sink, (float)ec.pred.multiclass(), 0, ec.tag); else { - VW::string_view sv_pred = all.sd->ldict->get(ec.pred.multiclass); + VW::string_view sv_pred = all.sd->ldict->get(ec.pred.multiclass()); all.print_text_by_ref(sink, sv_pred.to_string(), ec.tag); } - MULTICLASS::print_update(all, ec, ec.pred.multiclass); + MULTICLASS::print_update(all, ec, ec.pred.multiclass()); VW::finish_example(all, ec); } } // namespace MULTICLASS diff --git a/vowpalwabbit/multilabel.cc b/vowpalwabbit/multilabel.cc index e573458a3d6..214af61d536 100644 --- a/vowpalwabbit/multilabel.cc +++ b/vowpalwabbit/multilabel.cc @@ -8,10 +8,10 @@ namespace MULTILABEL { -char* bufread_label(labels* ld, char* c, io_buf& cache) +char* bufread_label(labels& ld, char* c, io_buf& cache) { size_t num = *(size_t*)c; - ld->label_v.clear(); + ld.label_v.clear(); c += sizeof(size_t); size_t total = sizeof(uint32_t) * num; if (cache.buf_read(c, (int)total) < total) @@ -23,16 +23,16 @@ char* bufread_label(labels* ld, char* c, io_buf& cache) { uint32_t temp = *(uint32_t*)c; c += sizeof(uint32_t); - ld->label_v.push_back(temp); + ld.label_v.push_back(temp); } return c; } -size_t read_cached_label(shared_data*, void* v, io_buf& cache) +size_t read_cached_label(shared_data*, polylabel& v, io_buf& cache) { - labels* ld = (labels*)v; - ld->label_v.clear(); + auto& ld = v.multilabels(); + ld.label_v.clear(); char* c; size_t total = sizeof(size_t); if (cache.buf_read(c, (int)total) < total) @@ -42,62 +42,50 @@ size_t read_cached_label(shared_data*, void* v, io_buf& cache) return total; } -float weight(void*) { return 1.; } +float weight(polylabel&) { return 1.; } -char* bufcache_label(labels* ld, char* c) +char* bufcache_label(labels& ld, char* c) { - *(size_t*)c = ld->label_v.size(); + *(size_t*)c = ld.label_v.size(); c += sizeof(size_t); - for (unsigned int i = 0; i < ld->label_v.size(); i++) + for (unsigned int i = 0; i < ld.label_v.size(); i++) { - *(uint32_t*)c = ld->label_v[i]; + *(uint32_t*)c = ld.label_v[i]; c += sizeof(uint32_t); } return c; } -void cache_label(void* v, io_buf& cache) +void cache_label(polylabel& v, io_buf& cache) { char* c; - labels* ld = (labels*)v; - cache.buf_write(c, sizeof(size_t) + sizeof(uint32_t) * ld->label_v.size()); + auto& ld = v.multilabels(); + cache.buf_write(c, sizeof(size_t) + sizeof(uint32_t) * ld.label_v.size()); bufcache_label(ld, c); } -void default_label(void* v) +void default_label(polylabel& v) { - labels* ld = (labels*)v; - ld->label_v.clear(); -} - -bool test_label(void* v) -{ - labels* ld = (labels*)v; - return ld->label_v.size() == 0; -} - -void delete_label(void* v) -{ - labels* ld = (labels*)v; - if (ld) - ld->label_v.delete_v(); + if (v.get_type() != label_type_t::multilabels) + { + v.reset(); + v.init_as_multilabels(); + } + auto& ld = v.multilabels(); + ld.label_v.clear(); } -void copy_label(void* dst, void* src) +bool test_label(polylabel& v) { - if (dst && src) - { - labels* ldD = (labels*)dst; - labels* ldS = (labels*)src; - copy_array(ldD->label_v, ldS->label_v); - } + auto& ld = v.multilabels(); + return ld.label_v.size() == 0; } -void parse_label(parser* p, shared_data*, void* v, v_array& words) +void parse_label(parser* p, shared_data*, polylabel& v, v_array& words) { - labels* ld = (labels*)v; + auto& ld = v.multilabels(); - ld->label_v.clear(); + ld.label_v.clear(); switch (words.size()) { case 0: @@ -108,7 +96,7 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor for (const auto & parse_name : p->parse_name) { uint32_t n = int_of_string(parse_name); - ld->label_v.push_back(n); + ld.label_v.push_back(n); } break; default: @@ -118,8 +106,7 @@ void parse_label(parser* p, shared_data*, void* v, v_array& wor } } -label_parser multilabel = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, - test_label, sizeof(labels)}; +label_parser multilabel = {default_label, parse_label, cache_label, read_cached_label, polylabel_delete_label, weight, polylabel_copy_label, test_label, sizeof(labels)}; void print_update(vw& all, bool is_test, example& ec) { @@ -129,11 +116,11 @@ void print_update(vw& all, bool is_test, example& ec) if (is_test) label_string << " unknown"; else - for (size_t i = 0; i < ec.l.multilabels.label_v.size(); i++) label_string << " " << ec.l.multilabels.label_v[i]; + for (size_t i = 0; i < ec.l.multilabels().label_v.size(); i++) label_string << " " << ec.l.multilabels().label_v[i]; std::stringstream pred_string; - for (size_t i = 0; i < ec.pred.multilabels.label_v.size(); i++) - pred_string << " " << ec.pred.multilabels.label_v[i]; + for (size_t i = 0; i < ec.pred.multilabels().label_v.size(); i++) + pred_string << " " << ec.pred.multilabels().label_v[i]; all.sd->print_update(all.holdout_set_off, all.current_pass, label_string.str(), pred_string.str(), ec.num_features, all.progress_add, all.progress_arg); @@ -142,14 +129,12 @@ void print_update(vw& all, bool is_test, example& ec) void output_example(vw& all, example& ec) { - labels& ld = ec.l.multilabels; - float loss = 0.; - if (!test_label(&ld)) + if (!test_label(ec.l)) { // need to compute exact loss - labels preds = ec.pred.multilabels; - labels given = ec.l.multilabels; + labels& preds = ec.pred.multilabels(); + labels& given = ec.l.multilabels(); uint32_t preds_index = 0; uint32_t given_index = 0; @@ -176,23 +161,23 @@ void output_example(vw& all, example& ec) loss += preds.label_v.size() - preds_index; } - all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features); + all.sd->update(ec.test_only, !test_label(ec.l), loss, 1.f, ec.num_features); for (int sink : all.final_prediction_sink) if (sink >= 0) { std::stringstream ss; - for (size_t i = 0; i < ec.pred.multilabels.label_v.size(); i++) + for (size_t i = 0; i < ec.pred.multilabels().label_v.size(); i++) { if (i > 0) ss << ','; - ss << ec.pred.multilabels.label_v[i]; + ss << ec.pred.multilabels().label_v[i]; } ss << ' '; all.print_text_by_ref(sink, ss.str(), ec.tag); } - print_update(all, test_label(&ec.l.multilabels), ec); + print_update(all, test_label(ec.l), ec); } } // namespace MULTILABEL diff --git a/vowpalwabbit/multilabel_oaa.cc b/vowpalwabbit/multilabel_oaa.cc index 04bcafec7fc..20a8474df0b 100644 --- a/vowpalwabbit/multilabel_oaa.cc +++ b/vowpalwabbit/multilabel_oaa.cc @@ -16,35 +16,41 @@ struct multi_oaa template void predict_or_learn(multi_oaa& o, LEARNER::single_learner& base, example& ec) { - MULTILABEL::labels multilabels = ec.l.multilabels; - MULTILABEL::labels preds = ec.pred.multilabels; + MULTILABEL::labels multilabels = std::move(ec.l.multilabels()); + MULTILABEL::labels preds = std::move(ec.pred.multilabels()); preds.label_v.clear(); - ec.l.simple = {FLT_MAX, 1.f, 0.f}; + ec.l.reset(); + ec.l.init_as_simple(FLT_MAX, 1.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(); + uint32_t multilabel_index = 0; for (uint32_t i = 0; i < o.k; i++) { if (is_learn) { - ec.l.simple.label = -1.f; + ec.l.simple().label = -1.f; if (multilabels.label_v.size() > multilabel_index && multilabels.label_v[multilabel_index] == i) { - ec.l.simple.label = 1.f; + ec.l.simple().label = 1.f; multilabel_index++; } base.learn(ec, i); } else base.predict(ec, i); - if (ec.pred.scalar > 0.) + if (ec.pred.scalar() > 0.) preds.label_v.push_back(i); } if (is_learn && multilabel_index < multilabels.label_v.size()) std::cout << "label " << multilabels.label_v[multilabel_index] << " is not in {0," << o.k - 1 << "} This won't work right." << std::endl; - ec.pred.multilabels = preds; - ec.l.multilabels = multilabels; + ec.pred.reset(); + ec.pred.init_as_multilabels() = std::move(preds); + ec.l.reset(); + ec.l.init_as_multilabels() = std::move(multilabels); } void finish_example(vw& all, multi_oaa&, example& ec) @@ -67,8 +73,6 @@ LEARNER::base_learner* multilabel_oaa_setup(options_i& options, vw& all) predict_or_learn, predict_or_learn, data->k, prediction_type_t::multilabels); l.set_finish_example(finish_example); all.p->lp = MULTILABEL::multilabel; - all.label_type = label_type_t::multi; - all.delete_prediction = MULTILABEL::multilabel.delete_label; - + l.label_type = label_type_t::multilabels; return make_base(l); } diff --git a/vowpalwabbit/mwt.cc b/vowpalwabbit/mwt.cc index 5f63f925332..12eed319584 100644 --- a/vowpalwabbit/mwt.cc +++ b/vowpalwabbit/mwt.cc @@ -33,13 +33,6 @@ struct mwt v_array indices; // excluded namespaces features feature_space[256]; vw* all; - - ~mwt() - { - evals.delete_v(); - policies.delete_v(); - indices.delete_v(); - } }; inline bool observed_cost(CB::cb_class* cl) @@ -79,7 +72,7 @@ void value_policy(mwt& c, float val, uint64_t index) // estimate the value of a template void predict_or_learn(mwt& c, single_learner& base, example& ec) { - c.observation = get_observed_cost(ec.l.cb); + c.observation = get_observed_cost(ec.l.cb()); if (c.observation != nullptr) { @@ -117,7 +110,11 @@ void predict_or_learn(mwt& c, single_learner& base, example& ec) } // modify the predictions to use a vector with a score for each evaluated feature. - v_array preds = ec.pred.scalars; + v_array preds = std::move(ec.pred.scalars()); + + // TODO Confirm that this type is correct + ec.pred.reset(); + ec.pred.init_as_multiclass(); if (learn) { @@ -137,10 +134,11 @@ void predict_or_learn(mwt& c, single_learner& base, example& ec) // modify the predictions to use a vector with a score for each evaluated feature. preds.clear(); if (learn) - preds.push_back((float)ec.pred.multiclass); + preds.push_back((float)ec.pred.multiclass()); for (uint64_t index : c.policies) preds.push_back((float)c.evals[index].cost / (float)c.total); - ec.pred.scalars = preds; + ec.pred.reset(); + ec.pred.init_as_scalars(std::move(preds)); } void print_scalars(int f, v_array& scalars, v_array& tag) @@ -174,17 +172,19 @@ void finish_example(vw& all, mwt& c, example& ec) float loss = 0.; if (c.learn) if (c.observation != nullptr) - loss = get_cost_estimate(c.observation, (uint32_t)ec.pred.scalars[0]); + loss = get_cost_estimate(c.observation, (uint32_t)ec.pred.scalars()[0]); all.sd->update(ec.test_only, c.observation != nullptr, loss, 1.f, ec.num_features); - for (int sink : all.final_prediction_sink) print_scalars(sink, ec.pred.scalars, ec.tag); + for (int sink : all.final_prediction_sink) print_scalars(sink, ec.pred.scalars(), ec.tag); if (c.learn) { - v_array temp = ec.pred.scalars; - ec.pred.multiclass = (uint32_t)temp[0]; + v_array temp = std::move(ec.pred.scalars()); + ec.pred.reset(); + ec.pred.init_as_multiclass() = (uint32_t)temp[0]; CB::print_update(all, c.observation != nullptr, ec, nullptr, false); - ec.pred.scalars = temp; + ec.pred.reset(); + ec.pred.init_as_scalars(std::move(temp)); } VW::finish_example(all, ec); } @@ -250,9 +250,7 @@ base_learner* mwt_setup(options_i& options, vw& all) calloc_reserve(c->evals, all.length()); c->evals.end() = c->evals.begin() + all.length(); - all.delete_prediction = delete_scalars; all.p->lp = CB::cb_label; - all.label_type = label_type_t::cb; if (c->num_classes > 0) { @@ -280,5 +278,6 @@ base_learner* mwt_setup(options_i& options, vw& all) l->set_save_load(save_load); l->set_finish_example(finish_example); + l->label_type = label_type_t::cb; return make_base(*l); } diff --git a/vowpalwabbit/mwt.h b/vowpalwabbit/mwt.h index 39738d56aef..189c0a3768d 100644 --- a/vowpalwabbit/mwt.h +++ b/vowpalwabbit/mwt.h @@ -8,6 +8,5 @@ LEARNER::base_learner* mwt_setup(VW::config::options_i& options, vw& all); namespace MWT { -void delete_scalars(void* v); void print_scalars(int f, v_array& scalars, v_array& tag); } // namespace MWT diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc index ede044b1847..ab904b2ab68 100644 --- a/vowpalwabbit/nn.cc +++ b/vowpalwabbit/nn.cc @@ -38,8 +38,8 @@ struct nn float* hidden_units; bool* dropped_out; - polyprediction* hidden_units_pred; - polyprediction* hiddenbias_pred; + std::vector hidden_units_pred; + std::vector hiddenbias_pred; vw* all; // many things std::shared_ptr _random_state; @@ -49,11 +49,6 @@ struct nn delete squared_loss; free(hidden_units); free(dropped_out); - free(hidden_units_pred); - free(hiddenbias_pred); - VW::dealloc_example(nullptr, output_layer); - VW::dealloc_example(nullptr, hiddenbias); - VW::dealloc_example(nullptr, outputweight); } }; @@ -83,8 +78,7 @@ static inline float fastpow2(float p) float clipp = (p < -126) ? -126.0f : p; int w = (int)clipp; float z = clipp - w + offset; - union - { + union { uint32_t i; float f; } v = {cast_uint32_t((1 << 23) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z))}; @@ -100,9 +94,11 @@ void finish_setup(nn& n, vw& all) { // TODO: output_layer audit - memset(&n.output_layer, 0, sizeof(n.output_layer)); + // TODO: This memset is very dangerous especially now that example has destructor etc + memset(&n.output_layer, 0, sizeof(n.output_layer));memset(&n.output_layer, 0, sizeof(n.output_layer)); n.output_layer.interactions = &all.interactions; n.output_layer.indices.push_back(nn_output_namespace); + n.output_layer.pred.init_as_scalar(); uint64_t nn_index = nn_constant << all.weights.stride_shift(); features& fs = n.output_layer.feature_space[nn_output_namespace]; @@ -136,7 +132,8 @@ void finish_setup(nn& n, vw& all) n.hiddenbias.feature_space[constant_namespace].space_names.push_back( audit_strings_ptr(new audit_strings("", "HiddenBias"))); n.hiddenbias.total_sum_feat_sq++; - n.hiddenbias.l.simple.label = FLT_MAX; + n.hiddenbias.l.init_as_simple().label = FLT_MAX; + n.hiddenbias.pred.init_as_scalar(); n.hiddenbias.weight = 1; memset(&n.outputweight, 0, sizeof(n.outputweight)); n.outputweight.interactions = &all.interactions; @@ -148,7 +145,8 @@ void finish_setup(nn& n, vw& all) audit_strings_ptr(new audit_strings("", "OutputWeight"))); n.outputweight.feature_space[nn_output_namespace].values[0] = 1; n.outputweight.total_sum_feat_sq++; - n.outputweight.l.simple.label = FLT_MAX; + n.outputweight.l.init_as_simple().label = FLT_MAX; + n.outputweight.pred.init_as_scalar(); n.outputweight.weight = 1; n.finished_setup = true; @@ -163,7 +161,7 @@ void end_pass(nn& n) template void predict_or_learn_multi(nn& n, single_learner& base, example& ec) { - bool shouldOutput = n.all->raw_prediction > 0; + const bool shouldOutput = n.all->raw_prediction > 0; if (!n.finished_setup) finish_setup(n, *(n.all)); shared_data sd; @@ -171,15 +169,15 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) { sd_guard(n.all, &sd); - label_data ld = ec.l.simple; + label_data ld = ec.l.simple(); void (*save_set_minmax)(shared_data*, float) = n.all->set_minmax; float save_min_label; float save_max_label; float dropscale = n.dropout ? 2.0f : 1.0f; loss_function* save_loss = n.all->loss; - polyprediction* hidden_units = n.hidden_units_pred; - polyprediction* hiddenbias_pred = n.hiddenbias_pred; + polyprediction* hidden_units = n.hidden_units_pred.data(); + polyprediction* hiddenbias_pred = n.hiddenbias_pred.data(); bool* dropped_out = n.dropped_out; std::ostringstream outputStringStream; @@ -204,11 +202,11 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) for (unsigned int i = 0; i < n.k; ++i) // avoid saddle point at 0 - if (hiddenbias_pred[i].scalar == 0) + if (hiddenbias_pred[i].scalar() == 0) { - n.hiddenbias.l.simple.label = (float)(n._random_state->get_and_update_random() - 0.5); + n.hiddenbias.l.simple().label = (float)(n._random_state->get_and_update_random() - 0.5); base.learn(n.hiddenbias, i); - n.hiddenbias.l.simple.label = FLT_MAX; + n.hiddenbias.l.simple().label = FLT_MAX; } base.multipredict(ec, 0, n.k, hidden_units, true); @@ -218,8 +216,8 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) if (ec.passthrough) for (unsigned int i = 0; i < n.k; ++i) { - add_passthrough_feature(ec, i * 2, hiddenbias_pred[i].scalar); - add_passthrough_feature(ec, i * 2 + 1, hidden_units[i].scalar); + add_passthrough_feature(ec, i * 2, hiddenbias_pred[i].scalar()); + add_passthrough_feature(ec, i * 2 + 1, hidden_units[i].scalar()); } } @@ -228,8 +226,8 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) { if (i > 0) outputStringStream << ' '; - outputStringStream << i << ':' << hidden_units[i].scalar << ',' - << fasttanh(hidden_units[i].scalar); // TODO: huh, what was going on here? + outputStringStream << i << ':' << hidden_units[i].scalar() << ',' + << fasttanh(hidden_units[i].scalar()); // TODO: huh, what was going on here? } n.all->loss = save_loss; @@ -243,7 +241,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) float save_final_prediction = 0; float save_ec_loss = 0; -CONVERSE: // That's right, I'm using goto. So sue me. + CONVERSE: // That's right, I'm using goto. So sue me. n.output_layer.total_sum_feat_sq = 1; n.output_layer.feature_space[nn_output_namespace].sum_feat_sq = 1; @@ -259,7 +257,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) for (unsigned int i = 0; i < n.k; ++i) { - float sigmah = (dropped_out[i]) ? 0.0f : dropscale * fasttanh(hidden_units[i].scalar); + float sigmah = (dropped_out[i]) ? 0.0f : dropscale * fasttanh(hidden_units[i].scalar()); features& out_fs = n.output_layer.feature_space[nn_output_namespace]; out_fs.values[i] = sigmah; @@ -268,15 +266,15 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) n.outputweight.feature_space[nn_output_namespace].indicies[0] = out_fs.indicies[i]; base.predict(n.outputweight, n.k); - float wf = n.outputweight.pred.scalar; + float wf = n.outputweight.pred.scalar(); // avoid saddle point at 0 if (wf == 0) { float sqrtk = std::sqrt((float)n.k); - n.outputweight.l.simple.label = (float)(n._random_state->get_and_update_random() - 0.5) / sqrtk; + n.outputweight.l.simple().label = (float)(n._random_state->get_and_update_random() - 0.5) / sqrtk; base.update(n.outputweight, n.k); - n.outputweight.l.simple.label = FLT_MAX; + n.outputweight.l.simple().label = FLT_MAX; } } @@ -300,10 +298,10 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) * ec.feature_space[] is reverted to its original value * save_nn_output_namespace contains the COPIED value * save_nn_output_namespace is destroyed - */ + */ features save_nn_output_namespace = std::move(ec.feature_space[nn_output_namespace]); auto tmp_sum_feat_sq = n.output_layer.feature_space[nn_output_namespace].sum_feat_sq; - ec.feature_space[nn_output_namespace].deep_copy_from(n.output_layer.feature_space[nn_output_namespace]); + ec.feature_space[nn_output_namespace] = n.output_layer.feature_space[nn_output_namespace]; ec.total_sum_feat_sq += tmp_sum_feat_sq; if (is_learn) @@ -364,12 +362,12 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) n.outputweight.feature_space[nn_output_namespace].indicies[0] = n.output_layer.feature_space[nn_output_namespace].indicies[i]; base.predict(n.outputweight, n.k); - float nu = n.outputweight.pred.scalar; + float nu = n.outputweight.pred.scalar(); float gradhw = 0.5f * nu * gradient * sigmahprime; - ec.l.simple.label = GD::finalize_prediction(n.all->sd, hidden_units[i].scalar - gradhw); - ec.pred.scalar = hidden_units[i].scalar; - if (ec.l.simple.label != hidden_units[i].scalar) + ec.l.simple().label = GD::finalize_prediction(n.all->sd, hidden_units[i].scalar() - gradhw); + ec.pred.scalar() = hidden_units[i].scalar(); + if (ec.l.simple().label != hidden_units[i].scalar()) base.update(ec, i); } } @@ -382,7 +380,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) } } - ec.l.simple.label = ld.label; + ec.l.simple().label = ld.label; if (!converse) { @@ -403,7 +401,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, example& ec) } ec.partial_prediction = save_partial_prediction; - ec.pred.scalar = save_final_prediction; + ec.pred.scalar() = save_final_prediction; ec.loss = save_ec_loss; } n.all->set_minmax(n.all->sd, sd.min_label); @@ -422,7 +420,7 @@ void multipredict(nn& n, single_learner& base, example& ec, size_t count, size_t if (finalize_predictions) pred[c] = ec.pred; else - pred[c].scalar = ec.partial_prediction; + pred[c].scalar() = ec.partial_prediction; ec.ft_offset += (uint64_t)step; } ec.ft_offset -= (uint64_t)(step * count); @@ -432,7 +430,7 @@ void finish_example(vw& all, nn&, example& ec) { int save_raw_prediction = all.raw_prediction; all.raw_prediction = -1; - return_simple_example(all, nullptr, ec); + return_simple_example_explicit(all, ec); all.raw_prediction = save_raw_prediction; } @@ -481,8 +479,17 @@ base_learner* nn_setup(options_i& options, vw& all) n->hidden_units = calloc_or_throw(n->k); n->dropped_out = calloc_or_throw(n->k); - n->hidden_units_pred = calloc_or_throw(n->k); - n->hiddenbias_pred = calloc_or_throw(n->k); + n->hidden_units_pred.resize(n->k); + for (auto& pred : n->hidden_units_pred) + { + pred.init_as_scalar(); + } + n->hiddenbias_pred.resize(n->k); + for (auto& pred : n->hiddenbias_pred) + { + pred.init_as_scalar(); + } + n->output_layer.pred.init_as_scalar(); auto base = as_singleline(setup_base(options, all)); n->increment = base->increment; // Indexing of output layer is odd. @@ -493,6 +500,7 @@ base_learner* nn_setup(options_i& options, vw& all) l.set_multipredict(multipredict); l.set_finish_example(finish_example); l.set_end_pass(end_pass); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/no_label.cc b/vowpalwabbit/no_label.cc index 539a6feb756..6f7f24f89eb 100644 --- a/vowpalwabbit/no_label.cc +++ b/vowpalwabbit/no_label.cc @@ -6,6 +6,7 @@ #include #include #include +#include "vw_string_view.h" #include "cache.h" #include "accumulate.h" @@ -14,23 +15,33 @@ namespace no_label { -char* bufread_no_label(shared_data*, label_data*, char* c) { return c; } - -size_t read_cached_no_label(shared_data*, void*, io_buf&) { return 1; } - -float get_weight(void*) { return 1.; } - -char* bufcache_no_label(label_data*, char* c) { return c; } +size_t read_cached_no_label(shared_data*, polylabel& label, io_buf&) +{ + if (label.get_type() != label_type_t::empty) + { + label.reset(); + label.init_as_empty(); + } + return 1; +} -void cache_no_label(void*, io_buf&) {} +float get_weight(polylabel&) { return 1.; } -void default_no_label(void*) {} +void cache_no_label(polylabel&, io_buf&) {} -bool test_label(void*) { return false; } +// This is wasted work, ideally empty and unset should be the same thing. +void default_no_label(polylabel& label) +{ + if (label.get_type() != label_type_t::empty && label.get_type() != label_type_t::empty) + { + label.reset(); + label.init_as_empty(); + } +} -void delete_no_label(void*) {} +bool test_label(polylabel&) { return false; } -void parse_no_label(parser*, shared_data*, void*, v_array& words) +void parse_no_label(parser*, shared_data*, polylabel&, v_array& words) { switch (words.size()) { @@ -43,15 +54,15 @@ void parse_no_label(parser*, shared_data*, void*, v_array& word } } -label_parser no_label_parser = {default_no_label, parse_no_label, cache_no_label, read_cached_no_label, delete_no_label, - get_weight, nullptr, test_label, sizeof(nullptr)}; +label_parser no_label_parser = {default_no_label, parse_no_label, cache_no_label, read_cached_no_label, polylabel_delete_label, + get_weight, polylabel_copy_label, test_label, sizeof(nullptr)}; void print_no_label_update(vw& all, example& ec) { if (all.sd->weighted_labeled_examples + all.sd->weighted_unlabeled_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { - all.sd->print_update(all.holdout_set_off, all.current_pass, 0.f, ec.pred.scalar, ec.num_features, all.progress_add, + all.sd->print_update(all.holdout_set_off, all.current_pass, 0.f, ec.pred.scalar(), ec.num_features, all.progress_add, all.progress_arg); } } @@ -64,13 +75,13 @@ void output_and_account_no_label_example(vw& all, example& ec) for (size_t i = 0; i < all.final_prediction_sink.size(); i++) { int f = (int)all.final_prediction_sink[i]; - all.print_by_ref(f, ec.pred.scalar, 0, ec.tag); + all.print_by_ref(f, ec.pred.scalar(), 0, ec.tag); } print_no_label_update(all, ec); } -void return_no_label_example(vw& all, void*, example& ec) +void return_no_label_example(vw& all, polylabel&, example& ec) { output_and_account_example(all, ec); VW::finish_example(all, ec); diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc index 65e23bb4dda..f47b1f589e9 100644 --- a/vowpalwabbit/oaa.cc +++ b/vowpalwabbit/oaa.cc @@ -9,38 +9,40 @@ #include "rand48.h" #include "vw_exception.h" #include "vw.h" +#include using namespace VW::config; struct oaa { uint64_t k; - vw* all; // for raw - polyprediction* pred; // for multipredict - uint64_t num_subsample; // for randomized subsampling, how many negatives to draw? - uint32_t* subsample_order; // for randomized subsampling, in what order should we touch classes - size_t subsample_id; // for randomized subsampling, where do we live in the list - - ~oaa() - { - free(pred); - free(subsample_order); - } + vw* all; // for raw + std::vector pred; // for multipredict + uint64_t num_subsample; // for randomized subsampling, how many negatives to draw? + std::vector subsample_order; // for randomized subsampling, in what order should we touch classes + size_t subsample_id; // for randomized subsampling, where do we live in the list }; void learn_randomized(oaa& o, LEARNER::single_learner& base, example& ec) { - MULTICLASS::label_t ld = ec.l.multi; + MULTICLASS::label_t ld = ec.l.multi(); if (ld.label == 0 || (ld.label > o.k && ld.label != (uint32_t)-1)) + { std::cout << "label " << ld.label << " is not in {1," << o.k << "} This won't work right." << std::endl; + } + + // Prepare for next reduction. + ec.pred.reset(); + ec.pred.init_as_scalar(); + ec.l.reset(); + ec.l.init_as_simple(1.f, 0.f, 0.f); // truth - ec.l.simple = {1., 0.f, 0.f}; // truth base.learn(ec, ld.label - 1); size_t prediction = ld.label; float best_partial_prediction = ec.partial_prediction; - ec.l.simple.label = -1.; + ec.l.simple().label = -1.; float weight_temp = ec.weight; ec.weight *= ((float)o.k) / (float)o.num_subsample; size_t p = o.subsample_id; @@ -61,72 +63,96 @@ void learn_randomized(oaa& o, LEARNER::single_learner& base, example& ec) } o.subsample_id = p; - ec.pred.multiclass = (uint32_t)prediction; - ec.l.multi = ld; + // Ensure example is in correct state upon exiting. + ec.pred.reset(); + ec.pred.init_as_multiclass(static_cast(prediction)); + ec.l.reset(); + ec.l.init_as_multi(ld); ec.weight = weight_temp; } +// Prediction types is scalars when scores is true and multiclass when scores is false. template void predict_or_learn(oaa& o, LEARNER::single_learner& base, example& ec) { - MULTICLASS::label_t mc_label_data = ec.l.multi; + MULTICLASS::label_t mc_label_data = ec.l.multi(); if (mc_label_data.label == 0 || (mc_label_data.label > o.k && mc_label_data.label != (uint32_t)-1)) + { std::cout << "label " << mc_label_data.label << " is not in {1," << o.k << "} This won't work right." << std::endl; + } - std::stringstream outputStringStream; - uint32_t prediction = 1; - v_array scores_array; - if (scores) - scores_array = ec.pred.scalars; + ec.l.reset(); + ec.l.init_as_simple(FLT_MAX, 0.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(); + base.multipredict(ec, 0, o.k, o.pred.data(), true); - ec.l.simple = {FLT_MAX, 0.f, 0.f}; - base.multipredict(ec, 0, o.k, o.pred, true); + uint32_t prediction = 1; for (uint32_t i = 2; i <= o.k; i++) - if (o.pred[i - 1].scalar > o.pred[prediction - 1].scalar) + { + if (o.pred[i - 1].scalar() > o.pred[prediction - 1].scalar()) + { prediction = i; + } + } if (ec.passthrough) - for (uint32_t i = 1; i <= o.k; i++) add_passthrough_feature(ec, i, o.pred[i - 1].scalar); + { + for (uint32_t i = 1; i <= o.k; i++) + { + add_passthrough_feature(ec, i, o.pred[i - 1].scalar()); + } + } if (is_learn) { for (uint32_t i = 1; i <= o.k; i++) { - ec.l.simple = {(mc_label_data.label == i) ? 1.f : -1.f, 0.f, 0.f}; - ec.pred.scalar = o.pred[i - 1].scalar; + ec.l.reset(); + ec.l.init_as_simple((mc_label_data.label == i) ? 1.f : -1.f, 0.f, 0.f); + ec.pred.reset(); + ec.pred.init_as_scalar(o.pred[i - 1].scalar()); base.update(ec, i - 1); } } if (print_all) { - outputStringStream << "1:" << o.pred[0].scalar; - for (uint32_t i = 2; i <= o.k; i++) outputStringStream << ' ' << i << ':' << o.pred[i - 1].scalar; + std::stringstream outputStringStream; + outputStringStream << "1:" << o.pred[0].scalar(); + for (uint32_t i = 2; i <= o.k; i++) + outputStringStream << ' ' << i << ':' << o.pred[i - 1].scalar(); o.all->print_text_by_ref(o.all->raw_prediction, outputStringStream.str(), ec.tag); } if (scores) { - scores_array.clear(); - for (uint32_t i = 0; i < o.k; i++) scores_array.push_back(o.pred[i].scalar); - ec.pred.scalars = scores_array; + v_array scores_array; + for (uint32_t i = 0; i < o.k; i++) scores_array.push_back(o.pred[i].scalar()); + + ec.pred.reset(); + ec.pred.init_as_scalars(std::move(scores_array)); if (probabilities) { - float sum_prob = 0; + float sum_prob = 0.f; for (uint32_t i = 0; i < o.k; i++) { - ec.pred.scalars[i] = 1.f / (1.f + correctedExp(-o.pred[i].scalar)); - sum_prob += ec.pred.scalars[i]; + ec.pred.scalars()[i] = 1.f / (1.f + correctedExp(-o.pred[i].scalar())); + sum_prob += ec.pred.scalars()[i]; } - float inv_sum_prob = 1.f / sum_prob; - for (uint32_t i = 0; i < o.k; i++) ec.pred.scalars[i] *= inv_sum_prob; + const float inv_sum_prob = 1.f / sum_prob; + for (uint32_t i = 0; i < o.k; i++) ec.pred.scalars()[i] *= inv_sum_prob; } } else - ec.pred.multiclass = prediction; + { + ec.pred.reset(); + ec.pred.init_as_multiclass(prediction); + } - ec.l.multi = mc_label_data; + ec.l.reset(); + ec.l.init_as_multi(mc_label_data); } // TODO: partial code duplication with multiclass.cc:finish_example @@ -144,8 +170,8 @@ void finish_example_scores(vw& all, oaa& o, example& ec) float correct_class_prob = 0; if (probabilities) { - if (ec.l.multi.label <= o.k) // prevent segmentation fault if labeĺ==(uint32_t)-1 - correct_class_prob = ec.pred.scalars[ec.l.multi.label - 1]; + if (ec.l.multi().label <= o.k) // prevent segmentation fault if labeĺ==(uint32_t)-1 + correct_class_prob = ec.pred.scalars()[ec.l.multi().label - 1]; if (correct_class_prob > 0) multiclass_log_loss = -log(correct_class_prob) * ec.weight; if (ec.test_only) @@ -158,11 +184,11 @@ void finish_example_scores(vw& all, oaa& o, example& ec) // but we cannot store it in ec.pred union because we store ec.pred.probs there. uint32_t prediction = 0; for (uint32_t i = 1; i < o.k; i++) - if (ec.pred.scalars[i] > ec.pred.scalars[prediction]) + if (ec.pred.scalars()[i] > ec.pred.scalars()[prediction]) prediction = i; prediction++; // prediction is 1-based index (not 0-based) float zero_one_loss = 0; - if (ec.l.multi.label != prediction) + if (ec.l.multi().label != prediction) zero_one_loss = ec.weight; // === Print probabilities for all classes @@ -177,12 +203,12 @@ void finish_example_scores(vw& all, oaa& o, example& ec) } else outputStringStream << i + 1; - outputStringStream << ':' << ec.pred.scalars[i]; + outputStringStream << ':' << ec.pred.scalars()[i]; } for (int sink : all.final_prediction_sink) all.print_text_by_ref(sink, outputStringStream.str(), ec.tag); // === Report updates using zero-one loss - all.sd->update(ec.test_only, ec.l.multi.label != (uint32_t)-1, zero_one_loss, ec.weight, ec.num_features); + all.sd->update(ec.test_only, ec.l.multi().label != (uint32_t)-1, zero_one_loss, ec.weight, ec.num_features); // Alternatively, we could report multiclass_log_loss. // all.sd->update(ec.test_only, multiclass_log_loss, ec.weight, ec.num_features); // Even better would be to report both losses, but this would mean to increase @@ -217,8 +243,12 @@ LEARNER::base_learner* oaa_setup(options_i& options, vw& all) THROW("error: you have " << all.sd->ldict->getK() << " named labels; use that as the argument to oaa") data->all = &all; - data->pred = calloc_or_throw(data->k); - data->subsample_order = nullptr; + data->pred.resize(data->k); + for (auto& pred : data->pred) + { + pred.init_as_scalar(); + } + data->subsample_id = 0; if (data->num_subsample > 0) { @@ -229,14 +259,15 @@ LEARNER::base_learner* oaa_setup(options_i& options, vw& all) } else { - data->subsample_order = calloc_or_throw(data->k); - for (size_t i = 0; i < data->k; i++) data->subsample_order[i] = (uint32_t)i; + // Fills the vector with values from 0 to K. 0,1,2,...K + data->subsample_order.resize(data->k); + std::iota(std::begin(data->subsample_order), std::end(data->subsample_order), 0); + for (size_t i = 0; i < data->k; i++) { - size_t j = (size_t)(all.get_random_state()->get_and_update_random() * (float)(data->k - i)) + i; - uint32_t tmp = data->subsample_order[i]; - data->subsample_order[i] = data->subsample_order[j]; - data->subsample_order[j] = tmp; + const auto j = + static_cast(all.get_random_state()->get_and_update_random() * static_cast(data->k - i)) + i; + std::swap(data->subsample_order[i], data->subsample_order[j]); } } } @@ -246,37 +277,45 @@ LEARNER::base_learner* oaa_setup(options_i& options, vw& all) auto base = as_singleline(setup_base(options, all)); if (probabilities || scores) { - all.delete_prediction = delete_scalars; if (probabilities) { - auto loss_function_type = all.loss->getType(); + const auto loss_function_type = all.loss->getType(); if (loss_function_type != "logistic") + { all.trace_message << "WARNING: --probabilities should be used only with --loss_function=logistic" << std::endl; - // the three boolean template parameters are: is_learn, print_all and scores - l = &LEARNER::init_multiclass_learner(data, base, predict_or_learn, - predict_or_learn, all.p, data->k, prediction_type_t::scalars); + } + l = &LEARNER::init_multiclass_learner(data, base, + predict_or_learn, + predict_or_learn, all.p, + data->k, prediction_type_t::scalars); all.sd->report_multiclass_log_loss = true; - l->set_finish_example(finish_example_scores); + l->set_finish_example(finish_example_scores); } else { - l = &LEARNER::init_multiclass_learner(data, base, predict_or_learn, - predict_or_learn, all.p, data->k, prediction_type_t::scalars); - l->set_finish_example(finish_example_scores); + l = &LEARNER::init_multiclass_learner(data, base, + predict_or_learn, + predict_or_learn, all.p, + data->k, prediction_type_t::scalars); + l->set_finish_example(finish_example_scores); } } else if (all.raw_prediction > 0) - l = &LEARNER::init_multiclass_learner(data, base, predict_or_learn, - predict_or_learn, all.p, data->k, prediction_type_t::multiclass); + l = &LEARNER::init_multiclass_learner(data, base, + predict_or_learn, + predict_or_learn, all.p, + data->k, prediction_type_t::multiclass); else - l = &LEARNER::init_multiclass_learner(data, base, predict_or_learn, - predict_or_learn, all.p, data->k, prediction_type_t::multiclass); + l = &LEARNER::init_multiclass_learner(data, base, + predict_or_learn, + predict_or_learn, all.p, + data->k, prediction_type_t::multiclass); if (data_ptr->num_subsample > 0) { l->set_learn(learn_randomized); l->set_finish_example(MULTICLASS::finish_example_without_loss); } - + l->label_type = label_type_t::multi; return make_base(*l); } diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index f2b79d1a242..817d29aee60 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -208,7 +208,7 @@ void parse_dictionary_argument(vw& all, std::string str) // mimicing old v_hashmap behavior for load factor. // A smaller factor will generally use more memory but have faster access map->max_load_factor(0.25); - example* ec = VW::alloc_examples(all.p->lp.label_size, 1); + example* ec = VW::alloc_examples(1); size_t def = (size_t)' '; @@ -230,7 +230,7 @@ void parse_dictionary_argument(vw& all, std::string str) if (new_buffer == nullptr) { free(buffer); - VW::dealloc_example(all.p->lp.delete_label, *ec); + ec->~example(); free(ec); io->close_file(); delete io; @@ -264,8 +264,7 @@ void parse_dictionary_argument(vw& all, std::string str) { continue; } - std::unique_ptr arr(new features); - arr->deep_copy_from(ec->feature_space[def]); + std::unique_ptr arr(new features(ec->feature_space[def])); map->emplace(word, std::move(arr)); // clear up ec @@ -279,7 +278,7 @@ void parse_dictionary_argument(vw& all, std::string str) free(buffer); io->close_file(); delete io; - VW::dealloc_example(all.p->lp.delete_label, *ec); + ec->~example(); free(ec); if (!all.quiet) @@ -1227,7 +1226,11 @@ LEARNER::base_learner* setup_base(options_i& options, vw& all) if (base == nullptr) return setup_base(options, all); else + { + assert(base->label_type != label_type_t::unset); + assert(base->pred_type != prediction_type_t::unset); return base; + } } void parse_reductions(options_i& options, vw& all) @@ -1658,7 +1661,7 @@ vw* initialize( options_i& options, io_buf* model, bool skipModelLoad, trace_message_t trace_listener, void* trace_context) { vw& all = parse_args(options, trace_listener, trace_context); - + try { // if user doesn't pass in a model, read from options @@ -1896,15 +1899,7 @@ void finish(vw& all, bool delete_all) if (all.should_delete_options) delete all.options; - // TODO: migrate all finalization into parser destructor - if (all.p != nullptr) - { - free_parser(all); - finalize_source(all.p); - all.p->parse_name.clear(); - all.p->parse_name.delete_v(); - delete all.p; - } + delete all.p; bool seeded; if (all.weights.seeded() > 0) @@ -1923,7 +1918,6 @@ void finish(vw& all, bool delete_all) for (size_t i = 0; i < all.final_prediction_sink.size(); i++) if (all.final_prediction_sink[i] != 1) io_buf::close_file_or_socket(all.final_prediction_sink[i]); - all.final_prediction_sink.delete_v(); all.loaded_dictionaries.clear(); // TODO: should we be clearing the namespace dictionaries? diff --git a/vowpalwabbit/parse_dispatch_loop.h b/vowpalwabbit/parse_dispatch_loop.h index 7177d9f1966..1bd244bf124 100644 --- a/vowpalwabbit/parse_dispatch_loop.h +++ b/vowpalwabbit/parse_dispatch_loop.h @@ -10,7 +10,7 @@ using dispatch_fptr = std::function&)>; inline void parse_dispatch(vw& all, dispatch_fptr dispatch) { - v_array examples = v_init(); + v_array examples; size_t example_number = 0; // for variable-size batch learning algorithms try @@ -32,7 +32,8 @@ inline void parse_dispatch(vw& all, dispatch_fptr dispatch) all.passes_complete++; // setup an end_pass example - all.p->lp.default_label(&examples[0]->l); + examples[0]->l.reset(); + all.p->lp.default_label(examples[0]->l); examples[0]->end_pass = true; all.p->in_pass_counter = 0; @@ -66,5 +67,4 @@ inline void parse_dispatch(vw& all, dispatch_fptr dispatch) all.p->exc_ptr = std::current_exception(); } lock_done(*all.p); - examples.delete_v(); } diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index b26842e9630..6c9ed535e99 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -3,7 +3,7 @@ // license as described in the file LICENSE. #include -#include +#include "vw_string_view.h" #include #include "parse_example.h" #include "hash.h" @@ -187,7 +187,7 @@ class TC_parser affix_fs.push_back(_v, word_hash); if (audit) { - v_array affix_v = v_init(); + v_array affix_v; if (_index != ' ') affix_v.push_back(_index); affix_v.push_back(is_prefix ? '+' : '-'); @@ -229,7 +229,7 @@ class TC_parser spell_fs.push_back(_v, word_hash); if (audit) { - v_array spelling_v = v_init(); + v_array spelling_v; if (_index != ' ') { spelling_v.push_back(_index); @@ -260,7 +260,7 @@ class TC_parser if (audit) for (const auto& id : feats->indicies) { - std::stringstream ss; + std::stringstream ss; ss << _index << '_'; ss << feature_name; ss << '=' << id; @@ -402,7 +402,6 @@ class TC_parser TC_parser(VW::string_view line, vw& all, example* ae) : _line(line) { - _spelling = v_init(); if (!_line.empty()) { this->_read_idx = 0; @@ -423,7 +422,7 @@ class TC_parser void substring_to_example(vw* all, example* ae, VW::string_view example) { - all->p->lp.default_label(&ae->l); + all->p->lp.default_label(ae->l); size_t bar_idx = example.find('|'); @@ -457,7 +456,7 @@ void substring_to_example(vw* all, example* ae, VW::string_view example) } if (!all->p->words.empty()) - all->p->lp.parse_label(all->p, all->p->_shared_data, &ae->l, all->p->words); + all->p->lp.parse_label(all->p, all->p->_shared_data, ae->l, all->p->words); if (bar_idx != VW::string_view::npos) { diff --git a/vowpalwabbit/parse_example.h b/vowpalwabbit/parse_example.h index 691c02f953c..461612e221a 100644 --- a/vowpalwabbit/parse_example.h +++ b/vowpalwabbit/parse_example.h @@ -3,6 +3,7 @@ // license as described in the file LICENSE. #pragma once #include +#include "vw_string_view.h" #include "parse_primitives.h" #include "example.h" #include "vw.h" diff --git a/vowpalwabbit/parse_example_json.h b/vowpalwabbit/parse_example_json.h index 077d35cda0a..e03b53ed3c7 100644 --- a/vowpalwabbit/parse_example_json.h +++ b/vowpalwabbit/parse_example_json.h @@ -175,7 +175,7 @@ class LabelObjectState : public BaseState BaseState* StartObject(Context& ctx) override { - ctx.all->p->lp.default_label(&ctx.ex->l); + ctx.all->p->lp.default_label(ctx.ex->l); // don't allow { { { } } } if (ctx.previous_state == this) @@ -202,17 +202,17 @@ class LabelObjectState : public BaseState // simple if (!_stricmp(ctx.key, "Label")) { - ctx.ex->l.simple.label = v; + ctx.ex->l.simple().label = v; found = true; } else if (!_stricmp(ctx.key, "Initial")) { - ctx.ex->l.simple.initial = v; + ctx.ex->l.simple().initial = v; found = true; } else if (!_stricmp(ctx.key, "Weight")) { - ctx.ex->l.simple.weight = v; + ctx.ex->l.simple().weight = v; found = true; } // CB @@ -244,13 +244,13 @@ class LabelObjectState : public BaseState BaseState* EndObject(Context& ctx, rapidjson::SizeType) override { - if (ctx.all->label_type == label_type_t::ccb) + if (ctx.all->get_label_type() == label_type_t::conditional_contextual_bandit) { - auto ld = (CCB::label*)&ctx.ex->l; + auto& ld = ctx.ex->l.ccb(); for (auto id : inc) { - ld->explicit_included_actions.push_back(id); + ld.explicit_included_actions.push_back(id); } inc.clear(); @@ -270,21 +270,21 @@ class LabelObjectState : public BaseState actions.clear(); probs.clear(); - ld->outcome = outcome; + ld.outcome = outcome; cb_label = {0., 0, 0., 0.}; } } else if (found_cb) { - CB::label* ld = (CB::label*)&ctx.ex->l; - ld->costs.push_back(cb_label); + auto& ld = ctx.ex->l.cb(); + ld.costs.push_back(cb_label); found_cb = false; cb_label = {0., 0, 0., 0.}; } else if (found) { - count_label(ctx.all->sd, ctx.ex->l.simple.label); + count_label(ctx.all->sd, ctx.ex->l.simple().label); found = false; } @@ -357,14 +357,14 @@ struct LabelState : BaseState BaseState* Float(Context& ctx, float v) override { // TODO: once we introduce label types, check here - ctx.ex->l.simple.label = v; + ctx.ex->l.simple().label = v; return ctx.previous_state; } BaseState* Uint(Context& ctx, unsigned v) override { // TODO: once we introduce label types, check here - ctx.ex->l.simple.label = (float)v; + ctx.ex->l.simple().label = (float)v; return ctx.previous_state; } }; @@ -432,9 +432,9 @@ struct MultiState : BaseState BaseState* StartArray(Context& ctx) override { // mark shared example - if (ctx.all->label_type == label_type_t::cb) + if (ctx.all->get_label_type() == label_type_t::cb) { - CB::label* ld = &ctx.ex->l.cb; + CB::label* ld = &ctx.ex->l.cb(); CB::cb_class f; f.partial_prediction = 0.; @@ -444,9 +444,9 @@ struct MultiState : BaseState ld->costs.push_back(f); } - else if (ctx.all->label_type == label_type_t::ccb) + else if (ctx.all->get_label_type() == label_type_t::conditional_contextual_bandit) { - CCB::label* ld = &ctx.ex->l.conditional_contextual_bandit; + CCB::label* ld = &ctx.ex->l.ccb(); ld->type = CCB::example_type::shared; } else @@ -459,10 +459,10 @@ struct MultiState : BaseState { // allocate new example ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context); - ctx.all->p->lp.default_label(&ctx.ex->l); - if (ctx.all->label_type == label_type_t::ccb) + ctx.all->p->lp.default_label(ctx.ex->l); + if (ctx.all->get_label_type() == label_type_t::conditional_contextual_bandit) { - ctx.ex->l.conditional_contextual_bandit.type = CCB::example_type::action; + ctx.ex->l.ccb().type = CCB::example_type::action; } ctx.examples->push_back(ctx.ex); @@ -504,8 +504,8 @@ struct SlotsState : BaseState { // allocate new example ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context); - ctx.all->p->lp.default_label(&ctx.ex->l); - ctx.ex->l.conditional_contextual_bandit.type = CCB::example_type::slot; + ctx.all->p->lp.default_label(ctx.ex->l); + ctx.ex->l.ccb().type = CCB::example_type::slot; ctx.examples->push_back(ctx.ex); @@ -825,22 +825,22 @@ class DefaultState : public BaseState // If we are in CCB mode and there have been no slots. Check label cost, prob and action were passed. In that // case this is CB, so generate a single slot with this info. - if (ctx.all->label_type == label_type_t::ccb) + if (ctx.all->get_label_type() == label_type_t::conditional_contextual_bandit) { auto num_slots = std::count_if(ctx.examples->begin(), ctx.examples->end(), - [](example* ex) { return ex->l.conditional_contextual_bandit.type == CCB::example_type::slot; }); + [](example* ex) { return ex->l.ccb().type == CCB::example_type::slot; }); if (num_slots == 0 && ctx.label_object_state.found_cb) { ctx.ex = &(*ctx.example_factory)(ctx.example_factory_context); - ctx.all->p->lp.default_label(&ctx.ex->l); - ctx.ex->l.conditional_contextual_bandit.type = CCB::example_type::slot; + ctx.all->p->lp.default_label(ctx.ex->l); + ctx.ex->l.ccb().type = CCB::example_type::slot; ctx.examples->push_back(ctx.ex); auto outcome = new CCB::conditional_contextual_bandit_outcome(); outcome->cost = ctx.label_object_state.cb_label.cost; outcome->probabilities.push_back( {ctx.label_object_state.cb_label.action, ctx.label_object_state.cb_label.probability}); - ctx.ex->l.conditional_contextual_bandit.outcome = outcome; + ctx.ex->l.ccb().outcome = outcome; } } } @@ -1022,7 +1022,7 @@ class CCBOutcomeList : public BaseState // Find start index of slot objects by iterating until we find the first slot example. for (auto ex : *ctx.examples) { - if (ex->l.conditional_contextual_bandit.type != CCB::example_type::slot) + if (ex->l.ccb().type != CCB::example_type::slot) { slot_object_index++; } @@ -1058,12 +1058,12 @@ class CCBOutcomeList : public BaseState // DSJson requires the interaction object to be filled. After reading all slot outcomes fill out the top actions. for (auto ex : *ctx.examples) { - if (ex->l.conditional_contextual_bandit.type == CCB::example_type::slot) + if (ex->l.ccb().type == CCB::example_type::slot) { - if (ex->l.conditional_contextual_bandit.outcome) + if (ex->l.ccb().outcome) { - interactions->actions.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].action); - interactions->probabilities.push_back(ex->l.conditional_contextual_bandit.outcome->probabilities[0].score); + interactions->actions.push_back(ex->l.ccb().outcome->probabilities[0].action); + interactions->probabilities.push_back(ex->l.ccb().outcome->probabilities[0].score); } } } @@ -1294,7 +1294,7 @@ struct VWReaderHandler : public rapidjson::BaseReaderHandler, ctx.init(all); ctx.examples = examples; ctx.ex = (*examples)[0]; - all->p->lp.default_label(&ctx.ex->l); + all->p->lp.default_label(ctx.ex->l); ctx.stream = stream; ctx.stream_end = stream_end; @@ -1372,18 +1372,17 @@ void read_line_json( inline void apply_pdrop(vw& all, float pdrop, v_array& examples) { - if (all.label_type == label_type_t::cb) + if (all.get_label_type() == label_type_t::cb) { - for (auto& e : examples) + for (auto& e: examples) { - e->l.cb.weight = 1 - pdrop; + e->l.cb().weight = 1 - pdrop; } - } - else if (all.label_type == label_type_t::ccb) + } else if (all.get_label_type() == label_type_t::conditional_contextual_bandit) { - for (auto& e : examples) + for (auto& e: examples) { - e->l.conditional_contextual_bandit.weight = 1 - pdrop; + e->l.ccb().weight = 1 - pdrop; } } } diff --git a/vowpalwabbit/parse_primitives.h b/vowpalwabbit/parse_primitives.h index cbb1bc4ef2c..c7d07a45f39 100644 --- a/vowpalwabbit/parse_primitives.h +++ b/vowpalwabbit/parse_primitives.h @@ -123,7 +123,7 @@ inline float parseFloat(const char* p, size_t& end_idx, const char* endLine = nu { // can't use stod because that throws an exception. Use strtod instead. char* end = nullptr; - auto ret = strtod(start, &end); + auto ret = std::strtod(start, &end); if (end >= start) { end_idx = end - start; diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc index 830a35e681e..d19396d597a 100644 --- a/vowpalwabbit/parser.cc +++ b/vowpalwabbit/parser.cc @@ -79,7 +79,6 @@ bool is_test_only(uint32_t counter, uint32_t period, uint32_t after, bool holdou void set_compressed(parser* par) { - finalize_source(par); delete par->input; par->input = new comp_io_buf; delete par->output; @@ -207,21 +206,8 @@ IGNORE_DEPRECATED_USAGE_END } } -void finalize_source(parser* p) +void finalize_source(parser*) { -#ifdef _WIN32 - int f = _fileno(stdin); -#else - int f = fileno(stdin); -#endif - while (!p->input->files.empty() && p->input->files.last() == f) p->input->files.pop(); - p->input->close_files(); - - delete p->input; - p->input = nullptr; - p->output->close_files(); - delete p->output; - p->output = nullptr; } void make_write_cache(vw& all, std::string& newname, bool quiet) @@ -304,7 +290,7 @@ void parse_cache(vw& all, std::vector cache_files, bool kill_cache, { if (!quiet) all.trace_message << "using no cache" << endl; - all.p->output->space.delete_v(); + all.p->output->space.clear(); } } @@ -419,7 +405,7 @@ void enable_sources(vw& all, bool quiet, size_t passes, input_options& input_opt // create children size_t num_children = all.num_children; - v_array children = v_init(); + v_array children; children.resize(num_children); for (size_t i = 0; i < num_children; i++) { @@ -657,7 +643,7 @@ void generateGrams(vw& all, example*& ex) void end_pass_example(vw& all, example* ae) { - all.p->lp.default_label(&ae->l); + all.p->lp.default_label(ae->l); ae->end_pass = true; all.p->in_pass_counter = 0; } @@ -685,7 +671,8 @@ example& get_unused_example(vw* all) void setup_examples(vw& all, v_array& examples) { - for (example* ae : examples) setup_example(all, ae); + for (example* ae : examples) + setup_example(all, ae); } void setup_example(vw& all, example* ae) @@ -695,7 +682,7 @@ void setup_example(vw& all, example* ae) if (all.p->write_cache) { - all.p->lp.cache_label(&ae->l, *(all.p->output)); + all.p->lp.cache_label(ae->l, *(all.p->output)); cache_features(*(all.p->output), ae, all.parse_mask); } @@ -712,12 +699,12 @@ void setup_example(vw& all, example* ae) ae->test_only = is_test_only(all.p->in_pass_counter, all.holdout_period, all.holdout_after, all.holdout_set_off, all.p->emptylines_separate_examples ? (all.holdout_period - 1) : 0); // If this example has a test only label then it is true regardless. - ae->test_only |= all.p->lp.test_label(&ae->l); + ae->test_only |= all.p->lp.test_label(ae->l); if (all.p->emptylines_separate_examples && example_is_newline(*ae)) all.p->in_pass_counter++; - ae->weight = all.p->lp.get_weight(&ae->l); + ae->weight = all.p->lp.get_weight(ae->l); if (all.ignore_some) for (unsigned char* i = ae->indices.begin(); i != ae->indices.end(); i++) @@ -760,6 +747,44 @@ void setup_example(vw& all, example* ae) INTERACTIONS::eval_count_of_generated_ft(all, *ae, new_features_cnt, new_features_sum_feat_sq); ae->num_features += new_features_cnt; ae->total_sum_feat_sq += new_features_sum_feat_sq; + + // Prediction type should be preinitialized for the given reductions expected type. + if(ae->pred.get_type() != all.l->pred_type) + { + ae->pred.reset(); + switch (all.l->pred_type) + { + case (prediction_type_t::scalar): + ae->pred.init_as_scalar(); + break; + case (prediction_type_t::scalars): + ae->pred.init_as_scalars(); + break; + case (prediction_type_t::action_scores): + ae->pred.init_as_action_scores(); + break; + case (prediction_type_t::action_probs): + ae->pred.init_as_action_probs(); + break; + case (prediction_type_t::decision_scores): + ae->pred.init_as_decision_scores(); + break; + case (prediction_type_t::multiclass): + ae->pred.init_as_multiclass(); + break; + case (prediction_type_t::multilabels): + ae->pred.init_as_multilabels(); + break; + case (prediction_type_t::prob): + ae->pred.init_as_prob(); + break; + case (prediction_type_t::multiclassprobs): + ae->pred.multiclassprobs(); + break; + default: + THROW(to_string(all.l->pred_type) << " is not supported here"); + } + } } } // namespace VW @@ -768,7 +793,7 @@ namespace VW example* new_unused_example(vw& all) { example* ec = &get_unused_example(&all); - all.p->lp.default_label(&ec->l); + all.p->lp.default_label(ec->l); all.p->begin_parsed_examples++; ec->example_counter = (size_t)all.p->begin_parsed_examples.load(); return ec; @@ -798,15 +823,15 @@ void add_constant_feature(vw& vw, example* ec) void add_label(example* ec, float label, float weight, float base) { - ec->l.simple.label = label; - ec->l.simple.initial = base; + ec->l.simple().label = label; + ec->l.simple().initial = base; ec->weight = weight; } example* import_example(vw& all, const std::string& label, primitive_feature_space* features, size_t len) { example* ret = &get_unused_example(&all); - all.p->lp.default_label(&ret->l); + all.p->lp.default_label(ret->l); if (label.length() > 0) parse_example_label(all, *ret, label); @@ -860,17 +885,16 @@ void releaseFeatureSpace(primitive_feature_space* features, size_t len) void parse_example_label(vw& all, example& ec, std::string label) { - v_array words = v_init(); + v_array words; tokenize(' ', label, words); - all.p->lp.parse_label(all.p, all.p->_shared_data, &ec.l, words); - words.clear(); - words.delete_v(); + all.p->lp.parse_label(all.p, all.p->_shared_data, ec.l, words); } void empty_example(vw& /*all*/, example& ec) { - for (features& fs : ec) fs.clear(); + for (features& fs : ec) + fs.clear(); ec.indices.clear(); ec.tag.clear(); @@ -920,30 +944,30 @@ namespace VW { example* get_example(parser* p) { return p->ready_parsed_examples.pop(); } -float get_topic_prediction(example* ec, size_t i) { return ec->pred.scalars[i]; } +float get_topic_prediction(example* ec, size_t i) { return ec->pred.scalars()[i]; } -float get_label(example* ec) { return ec->l.simple.label; } +float get_label(example* ec) { return ec->l.simple().label; } float get_importance(example* ec) { return ec->weight; } -float get_initial(example* ec) { return ec->l.simple.initial; } +float get_initial(example* ec) { return ec->l.simple().initial; } -float get_prediction(example* ec) { return ec->pred.scalar; } +float get_prediction(example* ec) { return ec->pred.scalar(); } -float get_cost_sensitive_prediction(example* ec) { return (float)ec->pred.multiclass; } +float get_cost_sensitive_prediction(example* ec) { return (float)ec->pred.multiclass(); } -v_array& get_cost_sensitive_prediction_confidence_scores(example* ec) { return ec->pred.scalars; } +v_array& get_cost_sensitive_prediction_confidence_scores(example* ec) { return ec->pred.scalars(); } uint32_t* get_multilabel_predictions(example* ec, size_t& len) { - MULTILABEL::labels labels = ec->pred.multilabels; + MULTILABEL::labels labels = ec->pred.multilabels(); len = labels.label_v.size(); return labels.label_v.begin(); } float get_action_score(example* ec, size_t i) { - ACTION_SCORE::action_scores scores = ec->pred.a_s; + ACTION_SCORE::action_scores scores = ec->pred.action_scores(); if (i < scores.size()) { @@ -955,7 +979,7 @@ float get_action_score(example* ec, size_t i) } } -size_t get_action_score_length(example* ec) { return ec->pred.a_s.size(); } +size_t get_action_score_length(example* ec) { return ec->pred.action_scores().size(); } size_t get_tag_length(example* ec) { return ec->tag.size(); } @@ -968,13 +992,12 @@ float get_confidence(example* ec) { return ec->confidence; } example* example_initializer::operator()(example* ex) { - memset(&ex->l, 0, sizeof(polylabel)); - ex->passthrough = nullptr; - ex->tag = v_init(); - ex->indices = v_init(); + new (&ex->l) polylabel(); + new (&ex->pred) polyprediction(); IGNORE_DEPRECATED_USAGE_START ex->in_use = true; IGNORE_DEPRECATED_USAGE_END + ex->passthrough = nullptr; memset(ex->feature_space.data(), 0, ex->feature_space.size() * sizeof(ex->feature_space[0])); return ex; } @@ -988,52 +1011,9 @@ namespace VW void start_parser(vw& all) { all.parse_thread = std::thread(main_parse_loop, &all); } } // namespace VW -// a copy of dealloc_example except that this does not call the example destructor -// Work to remove this is currently in progress -void cleanup_example(void(*delete_label)(void*), example& ec, void(*delete_prediction)(void*)) +VW_DEPRECATED("No longer needed. Use destructor.") +void free_parser(vw& /*all*/) { - if (delete_label) - delete_label(&ec.l); - - if (delete_prediction) - delete_prediction(&ec.pred); - - ec.tag.delete_v(); - - if (ec.passthrough) - { - delete ec.passthrough; - } - - ec.indices.delete_v(); -} - -void free_parser(vw& all) -{ - all.p->words.delete_v(); - - if (!all.ngram_strings.empty()) - all.p->gram_mask.delete_v(); - - io_buf* output = all.p->output; - if (output != nullptr) - { - output->finalname.delete_v(); - output->currentname.delete_v(); - } - - while (!all.p->example_pool.empty()) - { - example* temp = all.p->example_pool.get_object(); - cleanup_example(all.p->lp.delete_label, *temp, all.delete_prediction); - } - - while (all.p->ready_parsed_examples.size() != 0) - { - example* temp = all.p->ready_parsed_examples.pop(); - cleanup_example(all.p->lp.delete_label, *temp, all.delete_prediction); - } - all.p->counts.delete_v(); } namespace VW diff --git a/vowpalwabbit/parser.h b/vowpalwabbit/parser.h index ca374fc6020..63ed1aa8112 100644 --- a/vowpalwabbit/parser.h +++ b/vowpalwabbit/parser.h @@ -48,13 +48,6 @@ struct parser this->input = new io_buf{}; this->output = new io_buf{}; this->lp = simple_label; - - // Free parser must still be used for the following fields. - this->words = v_init(); - this->parse_name = v_init(); - this->gram_mask = v_init(); - this->ids = v_init(); - this->counts = v_init(); } ~parser() @@ -128,6 +121,9 @@ void set_done(vw& all); // source control functions void reset_source(vw& all, size_t numbits); +VW_DEPRECATED("no longer needed") void finalize_source(parser* source); void set_compressed(parser* par); + +VW_DEPRECATED("no longer needed. Use destructor") void free_parser(vw& all); diff --git a/vowpalwabbit/prediction.h b/vowpalwabbit/prediction.h new file mode 100644 index 00000000000..c810e82a8d0 --- /dev/null +++ b/vowpalwabbit/prediction.h @@ -0,0 +1,472 @@ +#pragma once + +/* +When a new prediction type needs to be added the following actions must be taken: +- PREDICTION_TYPE is the type that will be used +- PREDICTION_NAME is the name to identify this label type +Steps: + 1. Add a new variant to prediction_type_t called PREDICTION_NAME + 2. Add the corresponding row to to_string: + TO_STRING_CASE(prediction_type_t::PREDICTION_NAME) + 3. Add the new type to the union: + PREDICTION_TYPE _PREDICTION_NAME; + 3. Add the corresponding row to polyprediction::copy_from + case (prediction_type_t::PREDICTION_NAME): + init_as_PREDICTION_NAME(std::move(other._PREDICTION_NAME)); + break; + 4. Add the corresponding row to polyprediction::move_from + case (prediction_type_t::PREDICTION_NAME): + init_as_PREDICTION_NAME(std::move(other._PREDICTION_NAME)); + break; + 5. Add the corresponding row to polyprediction::reset + case (prediction_type_t::PREDICTION_NAME): + destruct(_PREDICTION_NAME); + break; + 6. Add another three methods that correspond to the new type according to this template + template + PREDICTION_TYPE& init_as_PREDICTION_NAME(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_PREDICTION_NAME) PREDICTION_TYPE(std::forward(args)...); + _tag = prediction_type_t::PREDICTION_NAME; + return _PREDICTION_NAME; + } + + const PREDICTION_TYPE& PREDICTION_NAME() const + { + ensure_is_type(prediction_type_t::PREDICTION_NAME); + return _PREDICTION_NAME; + } + + PREDICTION_TYPE& PREDICTION_NAME() + { + ensure_is_type(prediction_type_t::PREDICTION_NAME); + return _PREDICTION_NAME; + } +*/ + +enum class prediction_type_t : int +{ + unset, + scalar, + scalars, + action_scores, + multiclassprobs, + multiclass, + multilabels, + prob, + decision_scores, + action_probs, +}; + +#define TO_STRING_CASE(enum_type) \ + case enum_type: \ + return #enum_type; + +inline const char* to_string(prediction_type_t prediction_type) +{ + switch (prediction_type) + { + TO_STRING_CASE(prediction_type_t::unset) + TO_STRING_CASE(prediction_type_t::scalar) + TO_STRING_CASE(prediction_type_t::scalars) + TO_STRING_CASE(prediction_type_t::action_scores) + TO_STRING_CASE(prediction_type_t::action_probs) + TO_STRING_CASE(prediction_type_t::decision_scores) + TO_STRING_CASE(prediction_type_t::multiclass) + TO_STRING_CASE(prediction_type_t::multilabels) + TO_STRING_CASE(prediction_type_t::prob) + TO_STRING_CASE(prediction_type_t::multiclassprobs) + default: + return ""; + } +} + +struct polyprediction +{ + private: + union { + float _scalar; + v_array _scalars; // a sequence of scalar predictions + ACTION_SCORE::action_scores _action_scores; // a sequence of classes with scores. + ACTION_SCORE::action_scores _action_probs; // a sequence of classes with probs. + CCB::decision_scores_t _decision_scores; + uint32_t _multiclass; + MULTILABEL::labels _multilabels; + float _prob; // for --probabilities --csoaa_ldf=mc + v_array _multiclassprobs; + + }; + prediction_type_t _tag; + + inline void ensure_is_type(prediction_type_t type) const + { +#ifndef NDEBUG + if (_tag != type) + { + THROW("Expected type: " << to_string(type) << ", but found: " << to_string(_tag)); + } +#else + _UNUSED(type); +#endif + } + + template + void destruct(T& item) + { + item.~T(); + } + + // These two functions only differ by parameter + void copy_from(const polyprediction& other) + { + switch (other._tag) + { + case (prediction_type_t::unset): + break; + case (prediction_type_t::scalar): + init_as_scalar(other._scalar); + break; + case (prediction_type_t::scalars): + init_as_scalars(other._scalars); + break; + case (prediction_type_t::action_scores): + init_as_action_scores(other._action_scores); + break; + case (prediction_type_t::action_probs): + init_as_action_probs(other._action_probs); + break; + case (prediction_type_t::decision_scores): + init_as_decision_scores(other._decision_scores); + break; + case (prediction_type_t::multiclass): + init_as_multiclass(other._multiclass); + break; + case (prediction_type_t::multilabels): + init_as_multilabels(other._multilabels); + break; + case (prediction_type_t::prob): + init_as_prob(other._prob); + break; + case (prediction_type_t::multiclassprobs): + init_as_multiclassprobs(other._multiclassprobs); + break; + default:; + } + } + + void move_from(polyprediction&& other) + { + switch (other._tag) + { + case (prediction_type_t::unset): + break; + case (prediction_type_t::scalar): + init_as_scalar(std::move(other._scalar)); + break; + case (prediction_type_t::scalars): + init_as_scalars(std::move(other._scalars)); + break; + case (prediction_type_t::action_scores): + init_as_action_scores(std::move(other._action_scores)); + break; + case (prediction_type_t::action_probs): + init_as_action_probs(std::move(other._action_probs)); + break; + case (prediction_type_t::decision_scores): + init_as_decision_scores(std::move(other._decision_scores)); + break; + case (prediction_type_t::multiclass): + init_as_multiclass(std::move(other._multiclass)); + break; + case (prediction_type_t::multilabels): + init_as_multilabels(std::move(other._multilabels)); + break; + case (prediction_type_t::prob): + init_as_prob(std::move(other._prob)); + break; + case (prediction_type_t::multiclassprobs): + init_as_multiclassprobs(std::move(other._multiclassprobs)); + break; + default:; + } + } + + public: + polyprediction() { _tag = prediction_type_t::unset; // Perhaps we should memset here? + }; + ~polyprediction() { reset(); } + + polyprediction(polyprediction&& other) + { + _tag = prediction_type_t::unset; + move_from(std::move(other)); + other.reset(); + } + + polyprediction& operator=(polyprediction&& other) + { + reset(); + move_from(std::move(other)); + other.reset(); + return *this; + } + + polyprediction(const polyprediction& other) { + _tag = prediction_type_t::unset; + copy_from(other); + } + + polyprediction& operator=(const polyprediction& other) { + reset(); + copy_from(other); + return *this; + } + + prediction_type_t get_type() const { return _tag; } + + void reset() + { + switch (_tag) + { + case (prediction_type_t::unset): + // Nothing to do! Whatever was in here has already been destroyed. + return; + case (prediction_type_t::scalar): + destruct(_scalar); + break; + case (prediction_type_t::scalars): + destruct(_scalars); + break; + case (prediction_type_t::action_scores): + destruct(_action_scores); + break; + case (prediction_type_t::action_probs): + destruct(_action_probs); + break; + case (prediction_type_t::decision_scores): + destruct(_decision_scores); + break; + case (prediction_type_t::multiclass): + destruct(_multiclass); + break; + case (prediction_type_t::multilabels): + destruct(_multilabels); + break; + case (prediction_type_t::prob): + destruct(_prob); + break; + case (prediction_type_t::multiclassprobs): + destruct(_multiclassprobs); + break; + default:; + } + + _tag = prediction_type_t::unset; + } + + template + float& init_as_scalar(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_scalar) float(std::forward(args)...); + _tag = prediction_type_t::scalar; + return _scalar; + } + + const float& scalar() const + { + ensure_is_type(prediction_type_t::scalar); + return _scalar; + } + + float& scalar() + { + ensure_is_type(prediction_type_t::scalar); + return _scalar; + } + + template + v_array& init_as_scalars(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_scalars) v_array(std::forward(args)...); + _tag = prediction_type_t::scalars; + return _scalars; + } + + const v_array& scalars() const + { + ensure_is_type(prediction_type_t::scalars); + return _scalars; + } + + v_array& scalars() + { + ensure_is_type(prediction_type_t::scalars); + return _scalars; + } + + template + ACTION_SCORE::action_scores& init_as_action_scores(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_action_scores) ACTION_SCORE::action_scores(std::forward(args)...); + _tag = prediction_type_t::action_scores; + return _action_scores; + } + + const ACTION_SCORE::action_scores& action_scores() const + { + ensure_is_type(prediction_type_t::action_scores); + return _action_scores; + } + + ACTION_SCORE::action_scores& action_scores() + { + ensure_is_type(prediction_type_t::action_scores); + return _action_scores; + } + + template + ACTION_SCORE::action_scores& init_as_action_probs(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_action_probs) ACTION_SCORE::action_scores(std::forward(args)...); + _tag = prediction_type_t::action_probs; + return _action_probs; + } + + const ACTION_SCORE::action_scores& action_probs() const + { + ensure_is_type(prediction_type_t::action_probs); + return _action_probs; + } + + ACTION_SCORE::action_scores& action_probs() + { + ensure_is_type(prediction_type_t::action_probs); + return _action_probs; + } + + template + CCB::decision_scores_t& init_as_decision_scores(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_decision_scores) CCB::decision_scores_t(std::forward(args)...); + _tag = prediction_type_t::decision_scores; + return _decision_scores; + } + + const CCB::decision_scores_t& decision_scores() const + { + ensure_is_type(prediction_type_t::decision_scores); + return _decision_scores; + } + + CCB::decision_scores_t& decision_scores() + { + ensure_is_type(prediction_type_t::decision_scores); + return _decision_scores; + } + + template + uint32_t& init_as_multiclass(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_multiclass) uint32_t(std::forward(args)...); + _tag = prediction_type_t::multiclass; + return _multiclass; + } + + const uint32_t& multiclass() const + { + ensure_is_type(prediction_type_t::multiclass); + return _multiclass; + } + + uint32_t& multiclass() + { + ensure_is_type(prediction_type_t::multiclass); + return _multiclass; + } + + template + MULTILABEL::labels& init_as_multilabels(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_multilabels) MULTILABEL::labels(std::forward(args)...); + _tag = prediction_type_t::multilabels; + return _multilabels; + } + + const MULTILABEL::labels& multilabels() const + { + ensure_is_type(prediction_type_t::multilabels); + return _multilabels; + } + + MULTILABEL::labels& multilabels() + { + ensure_is_type(prediction_type_t::multilabels); + return _multilabels; + } + + template + float& init_as_prob(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_prob) float(std::forward(args)...); + _tag = prediction_type_t::prob; + return _prob; + } + + const float& prob() const + { + ensure_is_type(prediction_type_t::prob); + return _prob; + } + + float& prob() + { + ensure_is_type(prediction_type_t::prob); + return _prob; + } + + template + v_array& init_as_multiclassprobs(Args&&... args) + { + ensure_is_type(prediction_type_t::unset); + new (&_multiclassprobs) v_array(std::forward(args)...); + _tag = prediction_type_t::multiclassprobs; + return _multiclassprobs; + } + + const v_array& multiclassprobs() const + { + ensure_is_type(prediction_type_t::multiclassprobs); + return _multiclassprobs; + } + + v_array& multiclassprobs() + { + ensure_is_type(prediction_type_t::multiclassprobs); + return _multiclassprobs; + } + + // TODO: make this more generic through traits and type comparisons. + void reinterpret(prediction_type_t type) + { + // Currently the only valid reinterpret is between action scores and probs, or itself. + if((type == prediction_type_t::action_probs && _tag == prediction_type_t::action_scores) + || (type == prediction_type_t::action_scores && _tag == prediction_type_t::action_probs) + || type == _tag) + { + _tag = type; + } + else + { + THROW("Illegal reinterpret. Tried to reinterpret as " << to_string(type) << ", but contains: " << to_string(_tag)); + } + } +}; diff --git a/vowpalwabbit/print.cc b/vowpalwabbit/print.cc index ec5163d189b..aec953e3f5d 100644 --- a/vowpalwabbit/print.cc +++ b/vowpalwabbit/print.cc @@ -24,7 +24,7 @@ void print_feature(vw& /* all */, float value, uint64_t index) void learn(print& p, LEARNER::base_learner&, example& ec) { - label_data& ld = ec.l.simple; + label_data& ld = ec.l.simple(); if (ld.label != FLT_MAX) { cout << ld.label << " "; @@ -61,5 +61,6 @@ LEARNER::base_learner* print_setup(options_i& options, vw& all) all.weights.stride_shift(0); LEARNER::learner& ret = init_learner(p, learn, learn, 1); + ret.label_type = label_type_t::simple; return make_base(ret); } diff --git a/vowpalwabbit/recall_tree.cc b/vowpalwabbit/recall_tree.cc index 431ada1eea8..d6ce4cfd191 100644 --- a/vowpalwabbit/recall_tree.cc +++ b/vowpalwabbit/recall_tree.cc @@ -52,7 +52,6 @@ struct node , n(0) , entropy(0) , passes(1) - , preds(v_init()) { } }; @@ -72,12 +71,6 @@ struct recall_tree float bern_hyper; bool randomized_routing; - - ~recall_tree() - { - for (auto& node : nodes) node.preds.delete_v(); - nodes.delete_v(); - } }; float to_prob(float x) @@ -121,11 +114,12 @@ void init_tree(recall_tree& b) b.max_routers = routers_used; } +// TODO replace with std::find node_pred* find(recall_tree& b, uint32_t cn, example& ec) { node_pred* ls; - for (ls = b.nodes[cn].preds.begin(); ls != b.nodes[cn].preds.end() && ls->label != ec.l.multi.label; ++ls) + for (ls = b.nodes[cn].preds.begin(); ls != b.nodes[cn].preds.end() && ls->label != ec.l.multi().label; ++ls) ; return ls; @@ -137,7 +131,7 @@ node_pred* find_or_create(recall_tree& b, uint32_t cn, example& ec) if (ls == b.nodes[cn].preds.end()) { - node_pred newls(ec.l.multi.label); + node_pred newls(ec.l.multi().label); b.nodes[cn].preds.push_back(newls); ls = b.nodes[cn].preds.end() - 1; } @@ -251,13 +245,16 @@ void remove_node_id_feature(recall_tree& /* b */, uint32_t /* cn */, example& ec uint32_t oas_predict(recall_tree& b, single_learner& base, uint32_t cn, example& ec) { - MULTICLASS::label_t mc = ec.l.multi; - uint32_t save_pred = ec.pred.multiclass; + MULTICLASS::label_t mc = ec.l.multi(); + uint32_t save_pred = ec.pred.multiclass(); uint32_t amaxscore = 0; add_node_id_feature(b, cn, ec); - ec.l.simple = {FLT_MAX, 0.f, 0.f}; + ec.l.reset(); + ec.l.init_as_simple() = {FLT_MAX, 0.f, 0.f}; + ec.pred.reset(); + ec.pred.init_as_scalar(); float maxscore = std::numeric_limits::lowest(); for (node_pred* ls = b.nodes[cn].preds.begin(); @@ -273,8 +270,10 @@ uint32_t oas_predict(recall_tree& b, single_learner& base, uint32_t cn, example& remove_node_id_feature(b, cn, ec); - ec.l.multi = mc; - ec.pred.multiclass = save_pred; + ec.l.reset(); + ec.l.init_as_multi() = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = save_pred; return amaxscore; } @@ -284,7 +283,7 @@ bool is_candidate(recall_tree& b, uint32_t cn, example& ec) for (node_pred* ls = b.nodes[cn].preds.begin(); ls != b.nodes[cn].preds.end() && ls < b.nodes[cn].preds.begin() + b.max_candidates; ++ls) { - if (ls->label == ec.l.multi.label) + if (ls->label == ec.l.multi().label) return true; } @@ -308,10 +307,12 @@ bool stop_recurse_check(recall_tree& b, uint32_t parent, uint32_t child) predict_type predict_from(recall_tree& b, single_learner& base, example& ec, uint32_t cn) { - MULTICLASS::label_t mc = ec.l.multi; - uint32_t save_pred = ec.pred.multiclass; - - ec.l.simple = {FLT_MAX, 0.f, 0.f}; + MULTICLASS::label_t mc = ec.l.multi(); + uint32_t save_pred = ec.pred.multiclass(); + ec.l.reset(); + ec.l.init_as_simple() = {FLT_MAX, 0.f, 0.f}; + ec.pred.reset(); + ec.pred.init_as_scalar(); while (b.nodes[cn].internal) { base.predict(ec, b.nodes[cn].base_router); @@ -324,8 +325,10 @@ predict_type predict_from(recall_tree& b, single_learner& base, example& ec, uin cn = newcn; } - ec.l.multi = mc; - ec.pred.multiclass = save_pred; + ec.l.reset(); + ec.l.init_as_multi() = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = save_pred; return predict_type(cn, oas_predict(b, base, cn, ec)); } @@ -334,13 +337,13 @@ void predict(recall_tree& b, single_learner& base, example& ec) { predict_type pred = predict_from(b, base, ec, 0); - ec.pred.multiclass = pred.class_prediction; + ec.pred.multiclass() = pred.class_prediction; } float train_node(recall_tree& b, single_learner& base, example& ec, uint32_t cn) { - MULTICLASS::label_t mc = ec.l.multi; - uint32_t save_pred = ec.pred.multiclass; + MULTICLASS::label_t mc = ec.l.multi(); + uint32_t save_pred = ec.pred.multiclass(); // minimize entropy // better than maximize expected likelihood, and the proofs go through :) @@ -355,7 +358,10 @@ float train_node(recall_tree& b, single_learner& base, example& ec, uint32_t cn) float route_label = delta_left < delta_right ? -1.f : 1.f; float imp_weight = fabs((float)(delta_left - delta_right)); - ec.l.simple = {route_label, imp_weight, 0.}; + ec.l.reset(); + ec.l.init_as_simple() = {route_label, imp_weight, 0.}; + ec.pred.reset(); + ec.pred.init_as_scalar(); base.learn(ec, b.nodes[cn].base_router); // TODO: using the updated routing seems to help @@ -363,10 +369,12 @@ float train_node(recall_tree& b, single_learner& base, example& ec, uint32_t cn) // TODO: (doesn't play well with link function) base.predict(ec, b.nodes[cn].base_router); - float save_scalar = ec.pred.scalar; + float save_scalar = ec.pred.scalar(); - ec.l.multi = mc; - ec.pred.multiclass = save_pred; + ec.l.reset(); + ec.l.init_as_multi() = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = save_pred; return save_scalar; } @@ -375,7 +383,7 @@ void learn(recall_tree& b, single_learner& base, example& ec) { predict(b, base, ec); - if (b.all->training && ec.l.multi.label != (uint32_t)-1) // if training the tree + if (b.all->training && ec.l.multi().label != (uint32_t)-1) // if training the tree { uint32_t cn = 0; @@ -404,14 +412,17 @@ void learn(recall_tree& b, single_learner& base, example& ec) if (is_candidate(b, cn, ec)) { - MULTICLASS::label_t mc = ec.l.multi; - uint32_t save_pred = ec.pred.multiclass; + MULTICLASS::label_t mc = ec.l.multi(); + uint32_t save_pred = ec.pred.multiclass(); add_node_id_feature(b, cn, ec); - ec.l.simple = {1.f, 1.f, 0.f}; + ec.l.reset(); + ec.l.init_as_simple() = {1.f, 1.f, 0.f}; + ec.pred.reset(); + ec.pred.init_as_scalar(); base.learn(ec, b.max_routers + mc.label - 1); - ec.l.simple = {-1.f, 1.f, 0.f}; + ec.l.simple() = {-1.f, 1.f, 0.f}; for (node_pred* ls = b.nodes[cn].preds.begin(); ls != b.nodes[cn].preds.end() && ls < b.nodes[cn].preds.begin() + b.max_candidates; ++ls) @@ -422,8 +433,10 @@ void learn(recall_tree& b, single_learner& base, example& ec) remove_node_id_feature(b, cn, ec); - ec.l.multi = mc; - ec.pred.multiclass = save_pred; + ec.l.reset(); + ec.l.init_as_multi() = mc; + ec.pred.reset(); + ec.pred.init_as_multiclass() = save_pred; } } } @@ -534,6 +547,6 @@ base_learner* recall_tree_setup(options_i& options, vw& all) learner& l = init_multiclass_learner( tree, as_singleline(setup_base(options, all)), learn, predict, all.p, tree->max_routers + tree->k); l.set_save_load(save_load_tree); - + l.label_type = label_type_t::multi; return make_base(l); } diff --git a/vowpalwabbit/scorer.cc b/vowpalwabbit/scorer.cc index f755d726b3f..1250886c975 100644 --- a/vowpalwabbit/scorer.cc +++ b/vowpalwabbit/scorer.cc @@ -17,16 +17,21 @@ struct scorer template void predict_or_learn(scorer& s, LEARNER::single_learner& base, example& ec) { - s.all->set_minmax(s.all->sd, ec.l.simple.label); - if (is_learn && ec.l.simple.label != FLT_MAX && ec.weight > 0) + // LDA uses this reduction and explicitly uses no label and so we must check here before using it. + const float simple_label = ec.l.get_type() == label_type_t::simple ? ec.l.simple().label : 0.f; + + s.all->set_minmax(s.all->sd, simple_label); + if (is_learn && simple_label != FLT_MAX && ec.weight > 0) base.learn(ec); else base.predict(ec); - if (ec.weight > 0 && ec.l.simple.label != FLT_MAX) - ec.loss = s.all->loss->getLoss(s.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; + // TODO: LDA returns scalars prediction type - what should we do here? + + if (ec.weight > 0 && simple_label != FLT_MAX) + ec.loss = s.all->loss->getLoss(s.all->sd, ec.pred.scalar(), simple_label) * ec.weight; - ec.pred.scalar = link(ec.pred.scalar); + ec.pred.scalar() = link(ec.pred.scalar()); } template @@ -34,12 +39,12 @@ inline void multipredict(scorer&, LEARNER::single_learner& base, example& ec, si polyprediction* pred, bool finalize_predictions) { base.multipredict(ec, 0, count, pred, finalize_predictions); // TODO: need to thread step through??? - for (size_t c = 0; c < count; c++) pred[c].scalar = link(pred[c].scalar); + for (size_t c = 0; c < count; c++) pred[c].scalar() = link(pred[c].scalar()); } void update(scorer& s, LEARNER::single_learner& base, example& ec) { - s.all->set_minmax(s.all->sd, ec.l.simple.label); + s.all->set_minmax(s.all->sd, ec.l.simple().label); base.update(ec); } @@ -65,7 +70,9 @@ LEARNER::base_learner* scorer_setup(options_i& options, vw& all) .help("Specify the link function: identity, logistic, glf1 or poisson")); options.add_and_parse(new_options); - // This always returns a base_learner. + // This always returns a base_learner, except for in the case of LDA which does not use the scorer. + if (options.was_supplied("lda")) + return nullptr; s->all = &all; @@ -96,6 +103,7 @@ LEARNER::base_learner* scorer_setup(options_i& options, vw& all) l->set_multipredict(multipredict_f); l->set_update(update); + l->label_type = base->label_type; all.scorer = LEARNER::as_singleline(l); return make_base(*all.scorer); diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index ceb0b32c2ae..cd733d8683d 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -110,8 +110,7 @@ struct action_repr { if (_repr != nullptr) { - repr = new features(); - repr->deep_copy_from(*_repr); + repr = new features(*_repr); } } action_repr(action _a) : a(_a), repr(nullptr) {} @@ -207,7 +206,7 @@ struct search_private action learn_oracle_action; // store an oracle action for debugging purposes features last_action_repr; - polylabel* allowed_actions_cache; + polylabel allowed_actions_cache; size_t loss_declared_cnt; // how many times did run declare any loss (implicitly or explicitly)? v_array train_trajectory; // the training trajectory @@ -305,7 +304,6 @@ void clear_memo_foreach_action(search_private& priv) for (size_t i = 0; i < priv.memo_foreach_action.size(); i++) if (priv.memo_foreach_action[i]) { - priv.memo_foreach_action[i]->delete_v(); delete priv.memo_foreach_action[i]; } priv.memo_foreach_action.clear(); @@ -319,60 +317,22 @@ search::~search() { search_private& priv = *this->priv; - priv._random_state.~shared_ptr(); delete priv.truth_string; delete priv.pred_string; delete priv.bad_string_stream; - priv.rawOutputString.~basic_string(); - priv.test_action_sequence.~vector(); - priv.dat_new_feature_audit_ss.~basic_stringstream(); - priv.neighbor_features.delete_v(); - priv.timesteps.delete_v(); - if (priv.cb_learner) - priv.learn_losses.cb.costs.delete_v(); - else - priv.learn_losses.cs.costs.delete_v(); - if (priv.cb_learner) - priv.gte_label.cb.costs.delete_v(); - else - priv.gte_label.cs.costs.delete_v(); - - priv.condition_on_actions.delete_v(); - priv.learn_allowed_actions.delete_v(); - priv.ldf_test_label.costs.delete_v(); - priv.last_action_repr.~features(); - priv.active_uncertainty.delete_v(); - for (size_t i = 0; i < priv.active_known.size(); i++) priv.active_known[i].delete_v(); - priv.active_known.delete_v(); - - if (priv.cb_learner) - priv.allowed_actions_cache->cb.costs.delete_v(); - else - priv.allowed_actions_cache->cs.costs.delete_v(); - - priv.train_trajectory.delete_v(); - - for (auto& ar : priv.ptag_to_action) delete ar.repr; - priv.ptag_to_action.delete_v(); - clear_memo_foreach_action(priv); - priv.memo_foreach_action.delete_v(); - // destroy copied examples if we needed them - if (!priv.examples_dont_change) + delete priv.rawOutputStringStream; + for (auto& ar : priv.ptag_to_action) { - void (*delete_label)(void*) = priv.is_ldf ? CS::cs_label.delete_label : MC::mc_label.delete_label; - for (example& ec : priv.learn_ec_copy) VW::dealloc_example(delete_label, ec); - priv.learn_ec_copy.delete_v(); + if (ar.repr != nullptr) + { + delete ar.repr; + cdbg << "delete_v" << endl; + } } - priv.learn_condition_on_names.delete_v(); - priv.learn_condition_on.delete_v(); - - priv.learn_condition_on_act.delete_v(); - priv.cache_hash_map.~unordered_map(); + clear_memo_foreach_action(priv); - free(priv.allowed_actions_cache); - delete priv.rawOutputStringStream; + this->priv->~search_private(); } free(this->priv); } @@ -724,8 +684,13 @@ void reset_search_structure(search_private& priv) if (priv.beta > 1) priv.beta = 1; } - - for (auto& ar : priv.ptag_to_action) delete ar.repr; + for (auto& ar : priv.ptag_to_action) + { + if (ar.repr != nullptr) + { + delete ar.repr; + } + } priv.ptag_to_action.clear(); if (!priv.cb_learner) // was: if rollout_all_actions @@ -796,8 +761,8 @@ void add_example_conditioning(search_private& priv, example& ec, size_t conditio uint64_t extra_offset = 0; if (priv.is_ldf) - if (ec.l.cs.costs.size() > 0) - extra_offset = 3849017 * ec.l.cs.costs[0].class_index; + if (ec.l.get_type() == label_type_t::cs && ec.l.cs().costs.size() > 0) + extra_offset = 3849017 * ec.l.cs().costs[0].class_index; size_t I = condition_on_cnt; size_t N = std::max(priv.acset.max_bias_ngram_length, priv.acset.max_quad_ngram_length); @@ -891,40 +856,43 @@ void del_example_conditioning(search_private& priv, example& ec) del_features_in_top_namespace(priv, ec, conditioning_namespace); } -inline size_t cs_get_costs_size(bool isCB, polylabel& ld) { return isCB ? ld.cb.costs.size() : ld.cs.costs.size(); } +inline size_t cs_get_costs_size(bool isCB, polylabel& ld) +{ + return isCB ? ld.cb().costs.size() : ld.cs().costs.size(); +} inline uint32_t cs_get_cost_index(bool isCB, polylabel& ld, size_t k) { - return isCB ? ld.cb.costs[k].action : ld.cs.costs[k].class_index; + return isCB ? ld.cb().costs[k].action : ld.cs().costs[k].class_index; } inline float cs_get_cost_partial_prediction(bool isCB, polylabel& ld, size_t k) { - return isCB ? ld.cb.costs[k].partial_prediction : ld.cs.costs[k].partial_prediction; + return isCB ? ld.cb().costs[k].partial_prediction : ld.cs().costs[k].partial_prediction; } inline void cs_set_cost_loss(bool isCB, polylabel& ld, size_t k, float val) { if (isCB) - ld.cb.costs[k].cost = val; + ld.cb().costs[k].cost = val; else - ld.cs.costs[k].x = val; + ld.cs().costs[k].x = val; } inline void cs_costs_erase(bool isCB, polylabel& ld) { if (isCB) - ld.cb.costs.clear(); + ld.cb().costs.clear(); else - ld.cs.costs.clear(); + ld.cs().costs.clear(); } inline void cs_costs_resize(bool isCB, polylabel& ld, size_t new_size) { if (isCB) - ld.cb.costs.resize(new_size); + ld.cb().costs.resize(new_size); else - ld.cs.costs.resize(new_size); + ld.cs().costs.resize(new_size); } inline void cs_cost_push_back(bool isCB, polylabel& ld, uint32_t index, float value) @@ -932,12 +900,12 @@ inline void cs_cost_push_back(bool isCB, polylabel& ld, uint32_t index, float va if (isCB) { CB::cb_class cost = {value, index, 0., 0.}; - ld.cb.costs.push_back(cost); + ld.cb().costs.push_back(cost); } else { CS::wclass cost = {value, index, 0., 0.}; - ld.cs.costs.push_back(cost); + ld.cs().costs.push_back(cost); } } @@ -945,7 +913,7 @@ polylabel& allowed_actions_to_ld(search_private& priv, size_t ec_cnt, const acti size_t allowed_actions_cnt, const float* allowed_actions_cost) { bool isCB = priv.cb_learner; - polylabel& ld = *priv.allowed_actions_cache; + polylabel& ld = priv.allowed_actions_cache; uint32_t num_costs = (uint32_t)cs_get_costs_size(isCB, ld); if (priv.is_ldf) // LDF version easier @@ -1150,9 +1118,9 @@ action choose_oracle_action(search_private& priv, size_t ec_cnt, const action* o if (need_memo_foreach_action(priv) && (priv.state == INIT_TRAIN)) { v_array* this_cache = new v_array(); - *this_cache = v_init(); // TODO we don't really need to construct this polylabel - polylabel l = allowed_actions_to_ld(priv, 1, allowed_actions, allowed_actions_cnt, allowed_actions_cost); + polylabel l = + std::move(allowed_actions_to_ld(priv, 1, allowed_actions, allowed_actions_cnt, allowed_actions_cost)); size_t K = cs_get_costs_size(priv.cb_learner, l); for (size_t k = 0; k < K; k++) { @@ -1163,6 +1131,7 @@ action choose_oracle_action(search_private& priv, size_t ec_cnt, const action* o assert(priv.memo_foreach_action.size() == priv.meta_t + priv.t - 1); priv.memo_foreach_action.push_back(this_cache); cdbg << "memo_foreach_action[" << priv.meta_t + priv.t - 1 << "] = " << this_cache << " from oracle" << endl; + priv.allowed_actions_cache = std::move(l); } return a; } @@ -1173,22 +1142,23 @@ action single_prediction_notLDF(search_private& priv, example& ec, int policy, c // appropriate cost for that action { vw& all = *priv.all; - polylabel old_label = ec.l; - bool need_partial_predictions = need_memo_foreach_action(priv) || + auto old_label = std::move(ec.l); + ec.l.reset(); + const bool need_partial_predictions = need_memo_foreach_action(priv) || (priv.metaoverride && priv.metaoverride->_foreach_action) || (override_action != (action)-1) || priv.active_csoaa; if ((allowed_actions_cnt > 0) || need_partial_predictions) - ec.l = allowed_actions_to_ld(priv, 1, allowed_actions, allowed_actions_cnt, allowed_actions_cost); + ec.l = std::move(allowed_actions_to_ld(priv, 1, allowed_actions, allowed_actions_cnt, allowed_actions_cost)); else - ec.l.cs = priv.empty_cs_label; + ec.l.init_as_cs() = priv.empty_cs_label; cdbg << "allowed_actions_cnt=" << allowed_actions_cnt << ", ec.l = ["; - for (size_t i = 0; i < ec.l.cs.costs.size(); i++) - cdbg << ' ' << ec.l.cs.costs[i].class_index << ':' << ec.l.cs.costs[i].x; + for (size_t i = 0; i < ec.l.cs().costs.size(); i++) + cdbg << ' ' << ec.l.cs().costs[i].class_index << ':' << ec.l.cs().costs[i].x; cdbg << " ]" << endl; as_singleline(priv.base_learner)->predict(ec, policy); - uint32_t act = ec.pred.multiclass; + uint32_t act = ec.pred.multiclass(); cdbg << "a=" << act << " from"; if (allowed_actions) { @@ -1215,7 +1185,6 @@ action single_prediction_notLDF(search_private& priv, example& ec, int policy, c if (need_memo_foreach_action(priv) && (override_action == (action)-1)) { this_cache = new v_array(); - *this_cache = v_init(); } for (size_t k = 0; k < K; k++) { @@ -1264,22 +1233,24 @@ action single_prediction_notLDF(search_private& priv, example& ec, int policy, c while (priv.active_known.size() <= cur_t) { priv.active_known.push_back(v_array>()); - priv.active_known[priv.active_known.size() - 1] = v_init>(); + priv.active_known[priv.active_known.size() - 1] = v_array>(); cdbg << "active_known length now " << priv.active_known.size() << endl; } priv.active_known[cur_t].clear(); - assert(ec.l.cs.costs.size() > 0); - for (size_t k = 0; k < ec.l.cs.costs.size(); k++) + assert(ec.l.cs().costs.size() > 0); + for (size_t k = 0; k < ec.l.cs().costs.size(); k++) { - /* priv.active_known[cur_t].push_back( ec.l.cs.costs[k].pred_is_certain - ? ec.l.cs.costs[k].partial_prediction + /* priv.active_known[cur_t].push_back( ec.l.cs().costs[k].pred_is_certain + ? ec.l.cs().costs[k].partial_prediction : FLT_MAX ); cdbg << "active_known[" << cur_t << "][" << (priv.active_known[cur_t].size() - - 1) << "] = certain=" << ec.l.cs.costs[k].pred_is_certain << ", cost=" << ec.l.cs.costs[k].partial_prediction << + 1) << "] = certain=" << ec.l.cs().costs[k].pred_is_certain << ", cost=" << + ec.l.cs().costs[k].partial_prediction << "}" << endl; */ - CS::wclass& wc = ec.l.cs.costs[k]; + CS::wclass& wc = ec.l.cs().costs[k]; // Get query_needed from pred - bool query_needed = v_array_contains(ec.pred.multilabels.label_v, wc.class_index); + bool query_needed = std::find(ec.pred.multilabels().label_v.cbegin(), ec.pred.multilabels().label_v.cend(), + wc.class_index) == ec.pred.multilabels().label_v.cend(); std::pair p = {wc, query_needed}; // Push into active_known[cur_t] with wc priv.active_known[cur_t].push_back(p); @@ -1287,8 +1258,8 @@ action single_prediction_notLDF(search_private& priv, example& ec, int policy, c // << ':' << wc.x << " pp=" << wc.partial_prediction << " query_needed=" << wc.query_needed << " max_pred=" << // wc.max_pred << " min_pred=" << wc.min_pred << " is_range_overlapped=" << wc.is_range_overlapped << " // is_range_large=" << wc.is_range_large << endl; - // query_needed=" << ec.l.cs.costs[k].query_needed << ", cost=" << ec.l.cs.costs[k].partial_prediction << "}" << - // endl; + // query_needed=" << ec.l.cs().costs[k].query_needed << ", cost=" << ec.l.cs().costs[k].partial_prediction << "}" + // << endl; } } @@ -1306,7 +1277,12 @@ action single_prediction_notLDF(search_private& priv, example& ec, int policy, c all.print_text_by_ref(all.raw_prediction, priv.rawOutputStringStream->str(), ec.tag); } - ec.l = old_label; + if ((allowed_actions_cnt > 0) || need_partial_predictions) + { + priv.allowed_actions_cache = std::move(ec.l); + } + + ec.l = std::move(old_label); priv.total_predictions_made++; priv.num_features += ec.num_features; @@ -1321,7 +1297,7 @@ action single_prediction_LDF(search_private& priv, example* ecs, size_t ec_cnt, bool need_partial_predictions = need_memo_foreach_action(priv) || (priv.metaoverride && priv.metaoverride->_foreach_action) || (override_action != (action)-1); - CS::cs_label.default_label(&priv.ldf_test_label); + CS::default_label(priv.ldf_test_label); CS::wclass wc = {0., 1, 0., 0.}; priv.ldf_test_label.costs.push_back(wc); @@ -1335,7 +1311,6 @@ action single_prediction_LDF(search_private& priv, example* ecs, size_t ec_cnt, if (need_partial_predictions) { this_cache = new v_array(); - *this_cache = v_init(); } for (action a = (uint32_t)start_K; a < ec_cnt; a++) @@ -1344,8 +1319,13 @@ action single_prediction_LDF(search_private& priv, example* ecs, size_t ec_cnt, if (start_K > 0) LabelDict::add_example_namespaces_from_example(ecs[a], ecs[0]); - polylabel old_label = ecs[a].l; - ecs[a].l.cs = priv.ldf_test_label; + polylabel old_label = std::move(ecs[a].l); + ecs[a].l.reset(); + ecs[a].l.init_as_cs() = priv.ldf_test_label; + if (ecs[a].pred.get_type() == prediction_type_t::unset) + { + ecs[a].pred.init_as_multiclass(); + } multi_ex tmp; uint64_t old_offset = ecs[a].ft_offset; @@ -1371,7 +1351,7 @@ action single_prediction_LDF(search_private& priv, example* ecs, size_t ec_cnt, this_cache->push_back(action_cache(0., a, false, ecs[a].partial_prediction)); priv.num_features += ecs[a].num_features; - ecs[a].l = old_label; + ecs[a].l = std::move(old_label); if (start_K > 0) LabelDict::del_example_namespaces_from_example(ecs[a], ecs[0]); } @@ -1394,7 +1374,6 @@ action single_prediction_LDF(search_private& priv, example* ecs, size_t ec_cnt, priv.memo_foreach_action.push_back(this_cache); else { - this_cache->delete_v(); delete this_cache; } } @@ -1504,15 +1483,16 @@ void generate_training_example(search_private& priv, polylabel& losses, float we if (priv.cb_learner) { if (min_loss == FLT_MAX) - for (size_t i = 0; i < losses.cb.costs.size(); i++) min_loss = std::min(min_loss, losses.cb.costs[i].cost); - for (size_t i = 0; i < losses.cb.costs.size(); i++) losses.cb.costs[i].cost = losses.cb.costs[i].cost - min_loss; + for (size_t i = 0; i < losses.cb().costs.size(); i++) min_loss = std::min(min_loss, losses.cb().costs[i].cost); + for (size_t i = 0; i < losses.cb().costs.size(); i++) + losses.cb().costs[i].cost = losses.cb().costs[i].cost - min_loss; } else { if (min_loss == FLT_MAX) - for (size_t i = 0; i < losses.cs.costs.size(); i++) min_loss = std::min(min_loss, losses.cs.costs[i].x); - for (size_t i = 0; i < losses.cs.costs.size(); i++) - losses.cs.costs[i].x = (losses.cs.costs[i].x - min_loss) * weight; + for (size_t i = 0; i < losses.cs().costs.size(); i++) min_loss = std::min(min_loss, losses.cs().costs[i].x); + for (size_t i = 0; i < losses.cs().costs.size(); i++) + losses.cs().costs[i].x = (losses.cs().costs[i].x - min_loss) * weight; } // std::cerr << "losses = ["; for (size_t i=0; iaudit, priv.learn_ec_copy.begin() + i, ecs + i, label_size, label_copy_fn); + for (size_t i = 0; i < ec_cnt; i++) priv.learn_ec_copy[i] = ecs[i]; priv.learn_ec_ref = priv.learn_ec_copy.begin(); } @@ -1908,8 +1884,8 @@ action search_predict(search_private& priv, example* ecs, size_t ec_cnt, ptag my allowed_actions_to_label(priv, ec_cnt, allowed_actions, allowed_actions_cnt, allowed_actions_cost, oracle_actions, oracle_actions_cnt, priv.gte_label); cdbg << "priv.gte_label = ["; - for (size_t i = 0; i < priv.gte_label.cs.costs.size(); i++) - cdbg << ' ' << priv.gte_label.cs.costs[i].class_index << ':' << priv.gte_label.cs.costs[i].x; + for (size_t i = 0; i < priv.gte_label.cs().costs.size(); i++) + cdbg << ' ' << priv.gte_label.cs().costs[i].class_index << ':' << priv.gte_label.cs().costs[i].x; cdbg << " ]" << endl; priv.learn_ec_ref = ecs; @@ -2002,7 +1978,7 @@ void get_training_timesteps(search_private& priv, v_array& timesteps) timesteps.push_back(priv.active_uncertainty[i].second - 1); /* float k = (float)priv.total_examples_generated; - priv.ec_seq[t]->revert_weight = priv.all->loss->getRevertingWeight(priv.all->sd, priv.ec_seq[t].pred.scalar, + priv.ec_seq[t]->revert_weight = priv.all->loss->getRevertingWeight(priv.all->sd, priv.ec_seq[t].pred.scalar(), priv.all->eta / powf(k, priv.all->power_t)); float importance = query_decision(active_str, *priv.ec_seq[t], k); if (importance > 0.) timesteps.push_back(pair(0,t)); */ @@ -2045,7 +2021,7 @@ void get_training_timesteps(search_private& priv, v_array& timesteps) while ((timesteps.size() < (size_t)priv.subsample_timesteps) && (timesteps.size() < priv.T)) { size_t t = (size_t)(priv._random_state->get_and_update_random() * (float)priv.T); - if (!v_array_contains(timesteps, t)) + if (std::find(timesteps.cbegin(), timesteps.cend(), t) == timesteps.cend()) timesteps.push_back(t); } std::sort(timesteps.begin(), timesteps.end(), cmp_size_t); @@ -2065,7 +2041,6 @@ struct final_item void free_final_item(final_item* p) { - p->prefix->delete_v(); delete p->prefix; delete p; } @@ -2171,7 +2146,7 @@ void advance_from_known_actions(search_private& priv) priv.active_known[t][priv.learn_a_idx], true); */ - priv.learn_losses.cs.costs.push_back(priv.active_known[t][priv.learn_a_idx].first); + priv.learn_losses.cs().costs.push_back(priv.active_known[t][priv.learn_a_idx].first); cdbg << " --> adding " << priv.learn_a_idx << ":" << priv.active_known[t][priv.learn_a_idx].first.x << endl; priv.learn_a_idx++; advance_from_known_actions(priv); @@ -2265,9 +2240,9 @@ void train_single_example(search& sch, bool is_test_ex, bool is_holdout_ex, mult } if (priv.cb_learner) - priv.learn_losses.cb.costs.clear(); + priv.learn_losses.cb().costs.clear(); else - priv.learn_losses.cs.costs.clear(); + priv.learn_losses.cs().costs.clear(); for (size_t tid = 0; tid < priv.timesteps.size(); tid++) { @@ -2313,8 +2288,8 @@ void train_single_example(search& sch, bool is_test_ex, bool is_holdout_ex, mult // priv.learn_loss); } if (priv.active_csoaa_verify > 0.) - verify_active_csoaa( - priv.learn_losses.cs, priv.active_known[priv.learn_t], ec_seq[0]->example_counter, priv.active_csoaa_verify); + verify_active_csoaa(priv.learn_losses.cs(), priv.active_known[priv.learn_t], ec_seq[0]->example_counter, + priv.active_csoaa_verify); if (skipped_all_actions) { @@ -2335,7 +2310,7 @@ void train_single_example(search& sch, bool is_test_ex, bool is_holdout_ex, mult { for (size_t i = 0; i < priv.learn_allowed_actions.size(); i++) { - priv.learn_losses.cs.costs[i].class_index = priv.learn_allowed_actions[i]; + priv.learn_losses.cs().costs[i].class_index = priv.learn_allowed_actions[i]; } } // float min_loss = 0.; @@ -2343,22 +2318,23 @@ void train_single_example(search& sch, bool is_test_ex, bool is_holdout_ex, mult // for (size_t aid=0; aidsize(); aid++) // min_loss = std::min(min_loss, priv.memo_foreach_action[tid]->get(aid).cost); cdbg << "priv.learn_losses = ["; - for (auto& wc : priv.learn_losses.cs.costs) cdbg << " " << wc.class_index << ":" << wc.x; + for (auto& wc : priv.learn_losses.cs().costs) cdbg << " " << wc.class_index << ":" << wc.x; cdbg << " ]" << endl; cdbg << "gte" << endl; generate_training_example(priv, priv.learn_losses, 1., true); // , min_loss); // TODO: weight - if (!priv.examples_dont_change) - for (size_t n = 0; n < priv.learn_ec_copy.size(); n++) - { - if (sch.priv->is_ldf) - CS::cs_label.delete_label(&priv.learn_ec_copy[n].l.cs); - else - MC::mc_label.delete_label(&priv.learn_ec_copy[n].l.multi); - } + // Should not be needed anymore + // if (!priv.examples_dont_change) + // for (size_t n = 0; n < priv.learn_ec_copy.size(); n++) + // { + // if (sch.priv->is_ldf) + // CS::cs_label.delete_label(priv.learn_ec_copy[n].l); + // else + // MC::mc_label.delete_label(priv.learn_ec_copy[n].l); + // } if (priv.cb_learner) - priv.learn_losses.cb.costs.clear(); + priv.learn_losses.cb().costs.clear(); else - priv.learn_losses.cs.costs.clear(); + priv.learn_losses.cs().costs.clear(); } if (priv.active_csoaa && (priv.save_every_k_runs > 1)) @@ -2491,7 +2467,7 @@ void end_examples(search& sch) } } -bool mc_label_is_test(polylabel& lab) { return MC::mc_label.test_label(&lab.multi); } +bool mc_label_is_test(polylabel& lab) { return MC::mc_label.test_label(lab); } void search_initialize(vw* all, search& sch) { @@ -2529,10 +2505,7 @@ void search_initialize(vw* all, search& sch) sch.task_data = nullptr; - priv.active_uncertainty = v_init>(); - priv.active_known = v_init>>(); - - CS::cs_label.default_label(&priv.empty_cs_label); + CS::default_label(priv.empty_cs_label); new (&priv.rawOutputString) std::string(); priv.rawOutputStringStream = new std::stringstream(priv.rawOutputString); @@ -2610,11 +2583,11 @@ v_array read_allowed_transitions(action A, const char* filename) } fclose(f); - v_array allowed = v_init(); + v_array allowed; for (size_t from = 0; from < A; from++) { - v_array costs = v_init(); + v_array costs; for (size_t to = 0; to < A; to++) if (bg[from * (A + 1) + to]) @@ -2799,20 +2772,19 @@ base_learner* setup(options_i& options, vw& all) THROW("error: --search_rollin must be 'learn', 'ref', 'mix' or 'mix_per_state'"); // check if the base learner is contextual bandit, in which case, we dont rollout all actions. - priv.allowed_actions_cache = &calloc_or_throw(); if (options.was_supplied("cb")) { priv.cb_learner = true; - CB::cb_label.default_label(priv.allowed_actions_cache); - priv.learn_losses.cb.costs = v_init(); - priv.gte_label.cb.costs = v_init(); + CB::default_label(priv.allowed_actions_cache.init_as_cb()); + priv.learn_losses.cb().costs = v_array(); + priv.gte_label.cb().costs = v_array(); } else { priv.cb_learner = false; - CS::cs_label.default_label(priv.allowed_actions_cache); - priv.learn_losses.cs.costs = v_init(); - priv.gte_label.cs.costs = v_init(); + CS::default_label(priv.allowed_actions_cache.init_as_cs()); + priv.learn_losses.init_as_cs().costs = v_array(); + priv.gte_label.init_as_cs().costs = v_array(); } ensure_param(priv.beta, 0.0, 1.0, 0.5, "warning: search_beta must be in (0,1); resetting to 0.5"); @@ -2911,7 +2883,6 @@ base_learner* setup(options_i& options, vw& all) // default to OAA labels unless the task wants to override this (which they can do in initialize) all.p->lp = MC::mc_label; - all.label_type = label_type_t::mc; if (priv.task && priv.task->initialize) priv.task->initialize(*sch.get(), priv.A, options); if (priv.metatask && priv.metatask->initialize) @@ -2942,6 +2913,28 @@ base_learner* setup(options_i& options, vw& all) l.set_end_examples(end_examples); l.set_finish(search_finish); l.set_end_pass(end_pass); + + // In search, tasks can define which label should be used. There isn't a great + // way to do this right now. However, currently the only usage is for cost + // sensitive. So we will check at this point if the label parser is either + // multiclass or cost sensitive. In any other case throw as it is not + // supported yet. TODO: improve the handling of tasks specifying label types. + if (all.p->lp.parse_label == COST_SENSITIVE::cs_label.parse_label) + { + l.label_type = label_type_t::cs; + l.pred_type = prediction_type_t::multiclass; + } + else if (all.p->lp.parse_label == MC::mc_label.parse_label) + { + l.label_type = label_type_t::multi; + l.pred_type = prediction_type_t::multiclass; + } + else + { + THROW( + "Only multi and cost sensitive are supported in search right now. To support more, please add another check " + "for label types.") + } return make_base(l); } @@ -3021,7 +3014,7 @@ action search::predictLDF(example* ecs, size_t ec_cnt, ptag mytag, const action* // beyond the end of the array (usually resulting in a segfault at some point.) size_t action_index = a - COST_SENSITIVE::ec_is_example_header(ecs[0]) ? 0 : 1; - if ((mytag != 0) && ecs[action_index].l.cs.costs.size() > 0) + if ((mytag != 0) && ecs[action_index].l.cs().costs.size() > 0) { if (mytag < priv->ptag_to_action.size()) { @@ -3032,7 +3025,7 @@ action search::predictLDF(example* ecs, size_t ec_cnt, ptag mytag, const action* priv->ptag_to_action[mytag].repr = nullptr; } } - push_at(priv->ptag_to_action, action_repr(ecs[a].l.cs.costs[0].class_index, &(priv->last_action_repr)), mytag); + push_at(priv->ptag_to_action, action_repr(ecs[a].l.cs().costs[0].class_index, &(priv->last_action_repr)), mytag); } if (priv->auto_hamming_loss) loss(action_hamming_loss(a, oracle_actions, oracle_actions_cnt)); // TODO: action costs @@ -3085,7 +3078,7 @@ void search::set_label_parser(label_parser& lp, bool (*is_test)(polylabel&)) if (this->priv->all->vw_is_main && (this->priv->state != INITIALIZE)) std::cerr << "warning: task should not set label parser except in initialize function!" << endl; this->priv->all->p->lp = lp; - this->priv->all->p->lp.test_label = (bool (*)(void*))is_test; + this->priv->all->p->lp.test_label = is_test; this->priv->label_is_test = is_test; } @@ -3121,23 +3114,8 @@ void search::set_force_oracle(bool force) { this->priv->force_oracle = force; } // predictor implementation predictor::predictor(search& sch, ptag my_tag) - : is_ldf(false) - , my_tag(my_tag) - , ec(nullptr) - , ec_cnt(0) - , ec_alloced(false) - , weight(1.) - , oracle_is_pointer(false) - , allowed_is_pointer(false) - , allowed_cost_is_pointer(false) - , learner_id(0) - , sch(sch) -{ - oracle_actions = v_init(); - condition_on_tags = v_init(); - condition_on_names = v_init(); - allowed_actions = v_init(); - allowed_actions_cost = v_init(); + : is_ldf(false), my_tag(my_tag), ec(nullptr), ec_cnt(0), ec_alloced(false), weight(1.), learner_id(0), sch(sch) +{ } void predictor::free_ec() @@ -3145,30 +3123,14 @@ void predictor::free_ec() if (ec_alloced) { if (is_ldf) - for (size_t i = 0; i < ec_cnt; i++) - { - VW::dealloc_example(CS::cs_label.delete_label, ec[i]); - } + for (size_t i = 0; i < ec_cnt; i++) ec[i].~example(); else - { - VW::dealloc_example(nullptr, *ec); - } + ec->~example(); free(ec); } } -predictor::~predictor() -{ - if (!oracle_is_pointer) - oracle_actions.delete_v(); - if (!allowed_is_pointer) - allowed_actions.delete_v(); - if (!allowed_cost_is_pointer) - allowed_actions_cost.delete_v(); - free_ec(); - condition_on_tags.delete_v(); - condition_on_names.delete_v(); -} +predictor::~predictor() { free_ec(); } predictor& predictor::reset() { this->erase_oracles(); @@ -3224,8 +3186,8 @@ void predictor::set_input_at(size_t posn, example& ex) if (posn >= ec_cnt) THROW("call to set_input_at with too large a position: posn (" << posn << ") >= ec_cnt(" << ec_cnt << ")"); - VW::copy_example_data( - false, ec + posn, &ex, CS::cs_label.label_size, CS::cs_label.copy_label); // TODO: the false is "audit" + // Copy given example into ec. + ec[posn] = ex; } template @@ -3240,96 +3202,52 @@ void predictor::make_new_pointer(v_array& A, size_t new_size) } template -predictor& predictor::add_to(v_array& A, bool& A_is_ptr, T a, bool clear_first) +predictor& predictor::add_to(v_array& destination, T action, bool clear_first) { - if (A_is_ptr) // we need to make our own memory - { - if (clear_first) - A.end() = A.begin(); - size_t new_size = clear_first ? 1 : (A.size() + 1); - make_new_pointer(A, new_size); - A_is_ptr = false; - A[new_size - 1] = a; - } - else // we've already allocated our own memory + if (clear_first) { - if (clear_first) - A.clear(); - A.push_back(a); + destination.clear(); } + destination.push_back(action); + return *this; } template -predictor& predictor::add_to(v_array& A, bool& A_is_ptr, T* a, size_t count, bool clear_first) +predictor& predictor::add_to(v_array& destination, T* source, size_t count, bool clear_first) { - size_t old_size = A.size(); - if (old_size > 0) + if (clear_first) { - if (A_is_ptr) // we need to make our own memory - { - if (clear_first) - { - A.end() = A.begin(); - old_size = 0; - } - size_t new_size = old_size + count; - make_new_pointer(A, new_size); - A_is_ptr = false; - if (a != nullptr) - memcpy(A.begin() + old_size, a, count * sizeof(T)); - } - else // we already have our own memory - { - if (clear_first) - A.clear(); - if (a != nullptr) - push_many(A, a, count); - } + destination.clear(); } - else // old_size == 0, clear_first is irrelevant + // TODO uncomment this + // destination.reserve(destination.size() + count); + for (size_t i = 0; i < count; i++) { - if (!A_is_ptr) - A.delete_v(); // avoid memory leak - - A.begin() = a; - if (a != nullptr) // a is not nullptr - A.end() = a + count; - else - A.end() = a; - A.end_array = A.end(); - A_is_ptr = true; + destination.push_back(source[i]); } + return *this; } predictor& predictor::erase_oracles() { - if (oracle_is_pointer) - oracle_actions.end() = oracle_actions.begin(); - else - oracle_actions.clear(); + oracle_actions.clear(); return *this; } -predictor& predictor::add_oracle(action a) { return add_to(oracle_actions, oracle_is_pointer, a, false); } +predictor& predictor::add_oracle(action a) { return add_to(oracle_actions, a, false); } predictor& predictor::add_oracle(action* a, size_t action_count) { - return add_to(oracle_actions, oracle_is_pointer, a, action_count, false); -} -predictor& predictor::add_oracle(v_array& a) -{ - return add_to(oracle_actions, oracle_is_pointer, a.begin(), a.size(), false); + return add_to(oracle_actions, a, action_count, false); } +predictor& predictor::add_oracle(v_array& a) { return add_to(oracle_actions, a.begin(), a.size(), false); } -predictor& predictor::set_oracle(action a) { return add_to(oracle_actions, oracle_is_pointer, a, true); } +predictor& predictor::set_oracle(action a) { return add_to(oracle_actions, a, true); } predictor& predictor::set_oracle(action* a, size_t action_count) { - return add_to(oracle_actions, oracle_is_pointer, a, action_count, true); -} -predictor& predictor::set_oracle(v_array& a) -{ - return add_to(oracle_actions, oracle_is_pointer, a.begin(), a.size(), true); + return add_to(oracle_actions, a, action_count, true); } +predictor& predictor::set_oracle(v_array& a) { return add_to(oracle_actions, a.begin(), a.size(), true); } predictor& predictor::set_weight(float w) { @@ -3339,53 +3257,50 @@ predictor& predictor::set_weight(float w) predictor& predictor::erase_alloweds() { - if (allowed_is_pointer) - allowed_actions.end() = allowed_actions.begin(); - else - allowed_actions.clear(); - if (allowed_cost_is_pointer) - allowed_actions_cost.end() = allowed_actions_cost.begin(); - else - allowed_actions_cost.clear(); + allowed_actions.clear(); + allowed_actions_cost.clear(); return *this; } -predictor& predictor::add_allowed(action a) { return add_to(allowed_actions, allowed_is_pointer, a, false); } +predictor& predictor::add_allowed(action a) { return add_to(allowed_actions, a, false); } predictor& predictor::add_allowed(action* a, size_t action_count) { - return add_to(allowed_actions, allowed_is_pointer, a, action_count, false); -} -predictor& predictor::add_allowed(v_array& a) -{ - return add_to(allowed_actions, allowed_is_pointer, a.begin(), a.size(), false); + return add_to(allowed_actions, a, action_count, false); } +predictor& predictor::add_allowed(v_array& a) { return add_to(allowed_actions, a.begin(), a.size(), false); } -predictor& predictor::set_allowed(action a) { return add_to(allowed_actions, allowed_is_pointer, a, true); } +predictor& predictor::set_allowed(action a) { return add_to(allowed_actions, a, true); } predictor& predictor::set_allowed(action* a, size_t action_count) { - return add_to(allowed_actions, allowed_is_pointer, a, action_count, true); -} -predictor& predictor::set_allowed(v_array& a) -{ - return add_to(allowed_actions, allowed_is_pointer, a.begin(), a.size(), true); + return add_to(allowed_actions, a, action_count, true); } +predictor& predictor::set_allowed(v_array& a) { return add_to(allowed_actions, a.begin(), a.size(), true); } predictor& predictor::add_allowed(action a, float cost) { - add_to(allowed_actions_cost, allowed_cost_is_pointer, cost, false); - return add_to(allowed_actions, allowed_is_pointer, a, false); + add_to(allowed_actions_cost, cost, false); + return add_to(allowed_actions, a, false); } predictor& predictor::add_allowed(action* a, float* costs, size_t action_count) { - add_to(allowed_actions_cost, allowed_cost_is_pointer, costs, action_count, false); - return add_to(allowed_actions, allowed_is_pointer, a, action_count, false); + // In sequence task this function is used with a being nullptr, but costs is valid. + // So we need to check if we can do the adds. + if (costs != nullptr) + { + add_to(allowed_actions_cost, costs, action_count, false); + } + if (a != nullptr) + { + add_to(allowed_actions, a, action_count, false); + } + return *this; } predictor& predictor::add_allowed(v_array>& a) { for (size_t i = 0; i < a.size(); i++) { - add_to(allowed_actions, allowed_is_pointer, a[i].first, false); - add_to(allowed_actions_cost, allowed_cost_is_pointer, a[i].second, false); + add_to(allowed_actions, a[i].first, false); + add_to(allowed_actions_cost, a[i].second, false); } return *this; } @@ -3393,22 +3308,31 @@ predictor& predictor::add_allowed(std::vector>& a) { for (size_t i = 0; i < a.size(); i++) { - add_to(allowed_actions, allowed_is_pointer, a[i].first, false); - add_to(allowed_actions_cost, allowed_cost_is_pointer, a[i].second, false); + add_to(allowed_actions, a[i].first, false); + add_to(allowed_actions_cost, a[i].second, false); } return *this; } predictor& predictor::set_allowed(action a, float cost) { - add_to(allowed_actions_cost, allowed_cost_is_pointer, cost, true); - return add_to(allowed_actions, allowed_is_pointer, a, true); + add_to(allowed_actions_cost, cost, true); + return add_to(allowed_actions, a, true); } predictor& predictor::set_allowed(action* a, float* costs, size_t action_count) { - add_to(allowed_actions_cost, allowed_cost_is_pointer, costs, action_count, true); - return add_to(allowed_actions, allowed_is_pointer, a, action_count, true); + // In sequence task this function is used with a being nullptr, but costs is valid. + // So we need to check if we can do the adds. + if (costs != nullptr) + { + add_to(allowed_actions_cost, costs, action_count, true); + } + if (a != nullptr) + { + add_to(allowed_actions, a, action_count, true); + } + return *this; } predictor& predictor::set_allowed(v_array>& a) { diff --git a/vowpalwabbit/search.h b/vowpalwabbit/search.h index 2ee4f4980a5..f3981930589 100644 --- a/vowpalwabbit/search.h +++ b/vowpalwabbit/search.h @@ -335,22 +335,19 @@ class predictor bool ec_alloced; float weight; v_array oracle_actions; - bool oracle_is_pointer; // if we're pointing to your memory TRUE; if it's our own memory FALSE v_array condition_on_tags; v_array condition_on_names; v_array allowed_actions; - bool allowed_is_pointer; // if we're pointing to your memory TRUE; if it's our own memory FALSE v_array allowed_actions_cost; - bool allowed_cost_is_pointer; // if we're pointing to your memory TRUE; if it's our own memory FALSE size_t learner_id; search& sch; template void make_new_pointer(v_array& A, size_t new_size); template - predictor& add_to(v_array& A, bool& A_is_ptr, T a, bool clear_first); + predictor& add_to(v_array& A, T a, bool clear_first); template - predictor& add_to(v_array& A, bool& A_is_ptr, T* a, size_t count, bool clear_first); + predictor& add_to(v_array& A, T* a, size_t count, bool clear_first); void free_ec(); // prevent the user from doing something stupid :) ... ugh needed to turn this off for python :( diff --git a/vowpalwabbit/search_dep_parser.cc b/vowpalwabbit/search_dep_parser.cc index a0a89913879..279b17d1311 100644 --- a/vowpalwabbit/search_dep_parser.cc +++ b/vowpalwabbit/search_dep_parser.cc @@ -28,9 +28,9 @@ struct task_data v_array valid_actions, action_loss, gold_heads, gold_tags, stack, heads, tags, temp, valid_action_temp; v_array gold_actions, gold_action_temp; v_array> gold_action_losses; - v_array children[6]; // [0]:num_left_arcs, [1]:num_right_arcs; [2]: leftmost_arc, [3]: second_leftmost_arc, - // [4]:rightmost_arc, [5]: second_rightmost_arc - example *ec_buf[13]; + std::array, 6> children; // [0]:num_left_arcs, [1]:num_right_arcs; [2]: leftmost_arc, [3]: second_leftmost_arc, + // [4]:rightmost_arc, [5]: second_rightmost_arc + std::array ec_buf; bool old_style_labels; bool cost_to_go, one_learner; uint32_t transition_system; @@ -51,7 +51,7 @@ void initialize(Search::search &sch, size_t & /*num_actions*/, options_i &option vw &all = sch.get_vw_pointer_unsafe(); task_data *data = new task_data(); data->action_loss.resize(5); - data->ex = NULL; + data->ex = nullptr; sch.set_task_data(data); option_group_definition new_options("Dependency Parser Options"); @@ -74,11 +74,12 @@ void initialize(Search::search &sch, size_t & /*num_actions*/, options_i &option make_option("old_style_labels", data->old_style_labels).keep().help("Use old hack of label information")); options.add_and_parse(new_options); - data->ex = VW::alloc_examples(sizeof(polylabel), 1); + data->ex = VW::alloc_examples(1); data->ex->indices.push_back(val_namespace); for (size_t i = 1; i < 14; i++) data->ex->indices.push_back((unsigned char)i + 'A'); data->ex->indices.push_back(constant_namespace); data->ex->interactions = &sch.get_vw_pointer_unsafe().interactions; + data->ex->pred.init_as_multiclass(); if (data->one_learner) sch.set_num_learners(1); @@ -101,27 +102,15 @@ void initialize(Search::search &sch, size_t & /*num_actions*/, options_i &option else sch.set_options(AUTO_CONDITION_FEATURES | NO_CACHING); - sch.set_label_parser(COST_SENSITIVE::cs_label, [](polylabel &l) -> bool { return l.cs.costs.size() == 0; }); + sch.set_label_parser(COST_SENSITIVE::cs_label, [](polylabel &l) -> bool { return l.cs().costs.size() == 0; }); } void finish(Search::search &sch) { - task_data *data = sch.get_task_data(); - data->valid_actions.delete_v(); - data->valid_action_temp.delete_v(); - data->gold_heads.delete_v(); - data->gold_tags.delete_v(); - data->stack.delete_v(); - data->heads.delete_v(); - data->tags.delete_v(); - data->temp.delete_v(); - data->action_loss.delete_v(); - data->gold_actions.delete_v(); - data->gold_action_losses.delete_v(); - data->gold_action_temp.delete_v(); - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data->ex); + task_data* data = sch.get_task_data(); + + data->ex->~example(); free(data->ex); - for (size_t i = 0; i < 6; i++) data->children[i].delete_v(); delete data; } @@ -152,9 +141,12 @@ void inline reset_ex(example *ex) size_t transition_hybrid(Search::search &sch, uint64_t a_id, uint32_t idx, uint32_t t_id, uint32_t /* n */) { task_data *data = sch.get_task_data(); - v_array &heads = data->heads, &stack = data->stack, &gold_heads = data->gold_heads, - &gold_tags = data->gold_tags, &tags = data->tags; - v_array *children = data->children; + v_array& heads = data->heads; + v_array& stack = data->stack; + v_array& gold_heads = data->gold_heads; + v_array& gold_tags = data->gold_tags; + v_array& tags = data->tags; + auto& children = data->children; if (a_id == SHIFT) { stack.push_back(idx); @@ -195,9 +187,12 @@ size_t transition_hybrid(Search::search &sch, uint64_t a_id, uint32_t idx, uint3 size_t transition_eager(Search::search &sch, uint64_t a_id, uint32_t idx, uint32_t t_id, uint32_t n) { task_data *data = sch.get_task_data(); - v_array &heads = data->heads, &stack = data->stack, &gold_heads = data->gold_heads, - &gold_tags = data->gold_tags, &tags = data->tags; - v_array *children = data->children; + v_array& heads = data->heads; + v_array& stack = data->stack; + v_array& gold_heads = data->gold_heads; + v_array& gold_tags = data->gold_tags; + v_array& tags = data->tags; + auto& children = data->children; if (a_id == SHIFT) { stack.push_back(idx); @@ -247,8 +242,11 @@ void extract_features(Search::search &sch, uint32_t idx, multi_ex &ec) uint64_t mask = sch.get_mask(); uint64_t multiplier = (uint64_t)all.wpp << all.weights.stride_shift(); - v_array &stack = data->stack, &tags = data->tags, *children = data->children, &temp = data->temp; - example **ec_buf = data->ec_buf; + v_array& stack = data->stack; + v_array& tags = data->tags; + auto& children = data->children; + v_array& temp = data->temp; + example** ec_buf = data->ec_buf.data(); example &ex = *(data->ex); size_t n = ec.size(); @@ -379,8 +377,10 @@ bool is_valid(uint64_t action, const v_array& valid_actions) void get_eager_action_cost(Search::search &sch, uint32_t idx, uint64_t n) { task_data *data = sch.get_task_data(); - v_array &action_loss = data->action_loss, &stack = data->stack, &gold_heads = data->gold_heads, - heads = data->heads; + v_array& action_loss = data->action_loss; + v_array& stack = data->stack; + v_array& gold_heads = data->gold_heads; + v_array& heads = data->heads; size_t size = stack.size(); size_t last = (size == 0) ? 0 : stack.last(); for (size_t i = 1; i <= 4; i++) action_loss[i] = 0; @@ -455,7 +455,8 @@ void get_cost_to_go_losses(Search::search &sch, v_array task_data *data = sch.get_task_data(); bool &one_learner = data->one_learner; uint32_t &sys = data->transition_system; - v_array &action_loss = data->action_loss, &valid_actions = data->valid_actions; + v_array& action_loss = data->action_loss; + v_array& valid_actions = data->valid_actions; uint32_t &num_label = data->num_label; gold_action_losses.clear(); @@ -487,8 +488,10 @@ void get_cost_to_go_losses(Search::search &sch, v_array void get_gold_actions(Search::search &sch, uint32_t idx, uint64_t /* n */, v_array &gold_actions) { task_data *data = sch.get_task_data(); - v_array &action_loss = data->action_loss, &stack = data->stack, &gold_heads = data->gold_heads, - &valid_actions = data->valid_actions; + v_array& action_loss = data->action_loss; + v_array& stack = data->stack; + v_array& gold_heads = data->gold_heads; + v_array& valid_actions = data->valid_actions; gold_actions.clear(); size_t size = stack.size(); size_t last = (size == 0) ? 0 : stack.last(); @@ -554,8 +557,10 @@ void convert_to_onelearner_actions(Search::search &sch, v_array &actions void setup(Search::search &sch, multi_ex &ec) { task_data *data = sch.get_task_data(); - v_array &gold_heads = data->gold_heads, &heads = data->heads, &gold_tags = data->gold_tags, - &tags = data->tags; + v_array& gold_heads = data->gold_heads; + v_array& heads = data->heads; + v_array& gold_tags = data->gold_tags; + v_array& tags = data->tags; size_t n = ec.size(); heads.resize(n + 1); tags.resize(n + 1); @@ -565,7 +570,7 @@ void setup(Search::search &sch, multi_ex &ec) gold_tags.push_back(0); for (size_t i = 0; i < n; i++) { - const auto& costs = ec[i]->l.cs.costs; + const auto& costs = ec[i]->l.cs().costs; uint32_t head, tag; if (data->old_style_labels) { diff --git a/vowpalwabbit/search_entityrelationtask.cc b/vowpalwabbit/search_entityrelationtask.cc index 0ce16573a36..50f2607e91c 100644 --- a/vowpalwabbit/search_entityrelationtask.cc +++ b/vowpalwabbit/search_entityrelationtask.cc @@ -75,11 +75,11 @@ void initialize(Search::search& sch, size_t& /*num_actions*/, options_i& options } else { - example* ldf_examples = VW::alloc_examples(sizeof(CS::label), 10); + example* ldf_examples = VW::alloc_examples(10); CS::wclass default_wclass = {0., 0, 0., 0.}; for (size_t a = 0; a < 10; a++) { - ldf_examples[a].l.cs.costs.push_back(default_wclass); + ldf_examples[a].l.cs().costs.push_back(default_wclass); ldf_examples[a].interactions = &sch.get_vw_pointer_unsafe().interactions; } my_task_data->ldf_entity = ldf_examples; @@ -95,11 +95,10 @@ void initialize(Search::search& sch, size_t& /*num_actions*/, options_i& options void finish(Search::search& sch) { task_data* my_task_data = sch.get_task_data(); - my_task_data->y_allowed_entity.delete_v(); - my_task_data->y_allowed_relation.delete_v(); if (my_task_data->search_order == 3) { - for (size_t a = 0; a < 10; a++) VW::dealloc_example(CS::cs_label.delete_label, my_task_data->ldf_entity[a]); + for (size_t a = 0; a < 10; a++) + my_task_data->ldf_entity[a].~example(); free(my_task_data->ldf_entity); } delete my_task_data; @@ -145,8 +144,8 @@ size_t predict_entity( size_t prediction; if (my_task_data->allow_skip) { - v_array star_labels = v_init(); - star_labels.push_back(ex->l.multi.label); + v_array star_labels; + star_labels.push_back(ex->l.multi().label); star_labels.push_back(LABEL_SKIP); my_task_data->y_allowed_entity.push_back(LABEL_SKIP); prediction = Search::predictor(sch, my_tag) @@ -165,7 +164,7 @@ size_t predict_entity( { VW::copy_example_data(false, &my_task_data->ldf_entity[a], ex); update_example_indicies(true, &my_task_data->ldf_entity[a], 28904713, 4832917 * (uint64_t)(a + 1)); - CS::label& lab = my_task_data->ldf_entity[a].l.cs; + CS::label& lab = my_task_data->ldf_entity[a].l.cs(); lab.costs[0].x = 0.f; lab.costs[0].class_index = a; lab.costs[0].partial_prediction = 0.f; @@ -173,7 +172,7 @@ size_t predict_entity( } prediction = Search::predictor(sch, my_tag) .set_input(my_task_data->ldf_entity, 4) - .set_oracle(ex->l.multi.label - 1) + .set_oracle(ex->l.multi().label - 1) .set_learner_id(1) .predict() + 1; @@ -182,7 +181,7 @@ size_t predict_entity( { prediction = Search::predictor(sch, my_tag) .set_input(*ex) - .set_oracle(ex->l.multi.label) + .set_oracle(ex->l.multi().label) .set_allowed(my_task_data->y_allowed_entity) .set_learner_id(0) .predict(); @@ -195,7 +194,7 @@ size_t predict_entity( { loss = my_task_data->skip_cost; } - else if (prediction != ex->l.multi.label) + else if (prediction != ex->l.multi().label) loss = my_task_data->entity_cost; sch.loss(loss); return prediction; @@ -207,7 +206,7 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi task_data* my_task_data = sch.get_task_data(); size_t hist[2]; decode_tag(ex->tag, type, id1, id2); - v_array constrained_relation_labels = v_init(); + v_array constrained_relation_labels; if (my_task_data->constraints && predictions[id1] != 0 && predictions[id2] != 0) { hist[0] = predictions[id1]; @@ -228,8 +227,8 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi size_t prediction; if (my_task_data->allow_skip) { - v_array star_labels = v_init(); - star_labels.push_back(ex->l.multi.label); + v_array star_labels; + star_labels.push_back(ex->l.multi().label); star_labels.push_back(LABEL_SKIP); constrained_relation_labels.push_back(LABEL_SKIP); prediction = Search::predictor(sch, my_tag) @@ -252,12 +251,12 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi VW::copy_example_data(false, &my_task_data->ldf_relation[a], ex); update_example_indicies( true, &my_task_data->ldf_relation[a], 28904713, 4832917 * (uint64_t)(constrained_relation_labels[a])); - CS::label& lab = my_task_data->ldf_relation[a].l.cs; + CS::label& lab = my_task_data->ldf_relation[a].l.cs(); lab.costs[0].x = 0.f; lab.costs[0].class_index = constrained_relation_labels[a]; lab.costs[0].partial_prediction = 0.f; lab.costs[0].wap_value = 0.f; - if (constrained_relation_labels[a] == ex->l.multi.label) + if (constrained_relation_labels[a] == ex->l.multi().label) { correct_label = (int)a; } @@ -273,7 +272,7 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi { prediction = Search::predictor(sch, my_tag) .set_input(*ex) - .set_oracle(ex->l.multi.label) + .set_oracle(ex->l.multi().label) .set_allowed(constrained_relation_labels) .set_learner_id(1) .predict(); @@ -285,9 +284,9 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi { loss = my_task_data->skip_cost; } - else if (prediction != ex->l.multi.label) + else if (prediction != ex->l.multi().label) { - if (ex->l.multi.label == R_NONE) + if (ex->l.multi().label == R_NONE) { loss = my_task_data->relation_none_cost; } @@ -297,7 +296,6 @@ size_t predict_relation(Search::search& sch, example* ex, v_array& predi } } sch.loss(loss); - constrained_relation_labels.delete_v(); return prediction; } @@ -407,7 +405,7 @@ void run(Search::search& sch, multi_ex& ec) { task_data* my_task_data = sch.get_task_data(); - v_array predictions = v_init(); + v_array predictions; for (size_t i = 0; i < ec.size(); i++) { predictions.push_back(0); @@ -436,7 +434,6 @@ void run(Search::search& sch, multi_ex& ec) if (sch.output().good()) sch.output() << predictions[i] << ' '; } - predictions.delete_v(); } // this is totally bogus for the example -- you'd never actually do this! void update_example_indicies(bool /* audit */, example* ec, uint64_t mult_amount, uint64_t plus_amount) diff --git a/vowpalwabbit/search_graph.cc b/vowpalwabbit/search_graph.cc index dc7d7a9f546..dffa91e3634 100644 --- a/vowpalwabbit/search_graph.cc +++ b/vowpalwabbit/search_graph.cc @@ -89,7 +89,7 @@ struct task_data float true_counts_total; }; -inline bool example_is_test(polylabel& l) { return l.cs.costs.size() == 0; } +inline bool example_is_test(polylabel& l) { return l.cs().costs.size() == 0; } void initialize(Search::search& sch, size_t& num_actions, options_i& options) { @@ -140,7 +140,7 @@ void finish(Search::search& sch) delete D; } -inline bool example_is_edge(example* e) { return e->l.cs.costs.size() > 1; } +inline bool example_is_edge(example* e) { return e->l.cs().costs.size() > 1; } void run_bfs(task_data& D, multi_ex& ec) { @@ -158,9 +158,9 @@ void run_bfs(task_data& D, multi_ex& ec) { uint32_t n = D.bfs[i]; for (size_t id : D.adj[n]) - for (size_t j = 0; j < ec[id]->l.cs.costs.size(); j++) + for (size_t j = 0; j < ec[id]->l.cs().costs.size(); j++) { - uint32_t m = ec[id]->l.cs.costs[j].class_index; + uint32_t m = ec[id]->l.cs().costs[j].class_index; if ((m > 0) && (!touched[m - 1])) { D.bfs.push_back(m - 1); @@ -200,9 +200,9 @@ void setup(Search::search& sch, multi_ex& ec) THROW("error: got a node after getting edges!"); D.N++; - if (ec[i]->l.cs.costs.size() > 0) + if (ec[i]->l.cs().costs.size() > 0) { - D.true_counts[ec[i]->l.cs.costs[0].class_index] += 1.; + D.true_counts[ec[i]->l.cs().costs[0].class_index] += 1.; D.true_counts_total += 1.; } } @@ -214,15 +214,15 @@ void setup(Search::search& sch, multi_ex& ec) for (size_t i = D.N; i < ec.size(); i++) { - for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++) + for (size_t n = 0; n < ec[i]->l.cs().costs.size(); n++) { - if (ec[i]->l.cs.costs[n].class_index > D.N) - THROW("error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) << " > " + if (ec[i]->l.cs().costs[n].class_index > D.N) + THROW("error: edge source points to too large of a node id: " << (ec[i]->l.cs().costs[n].class_index) << " > " << D.N); } - for (size_t n = 0; n < ec[i]->l.cs.costs.size(); n++) + for (size_t n = 0; n < ec[i]->l.cs().costs.size(); n++) { - size_t nn = ec[i]->l.cs.costs[n].class_index; + size_t nn = ec[i]->l.cs().costs[n].class_index; if ((nn > 0) && (((D.adj[nn - 1].size() == 0) || (D.adj[nn - 1][D.adj[nn - 1].size() - 1] != i)))) // don't allow dups D.adj[nn - 1].push_back(i); @@ -280,9 +280,9 @@ void add_edge_features(Search::search& sch, task_data& D, size_t n, multi_ex& ec { bool n_in_sink = true; if (D.directed) - for (size_t j = 0; j < ec[i]->l.cs.costs.size() - 1; j++) + for (size_t j = 0; j < ec[i]->l.cs().costs.size() - 1; j++) { - size_t m = ec[i]->l.cs.costs[j].class_index; + size_t m = ec[i]->l.cs().costs[j].class_index; if (m == 0) break; if (m - 1 == n) @@ -293,15 +293,15 @@ void add_edge_features(Search::search& sch, task_data& D, size_t n, multi_ex& ec } bool m_in_sink = false; - for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++) + for (size_t j = 0; j < ec[i]->l.cs().costs.size(); j++) { - size_t m = ec[i]->l.cs.costs[j].class_index; + size_t m = ec[i]->l.cs().costs[j].class_index; if (m == 0) { m_in_sink = true; continue; } - if (j == ec[i]->l.cs.costs.size() - 1) + if (j == ec[i]->l.cs().costs.size() - 1) m_in_sink = true; m--; if (m == n) @@ -411,7 +411,7 @@ void run(Search::search& sch, multi_ex& ec) for (int n_id = start; n_id != end; n_id += step) { uint32_t n = D.bfs[n_id]; - uint32_t k = (ec[n]->l.cs.costs.size() > 0) ? ec[n]->l.cs.costs[0].class_index : 0; + uint32_t k = (ec[n]->l.cs().costs.size() > 0) ? ec[n]->l.cs().costs[0].class_index : 0; bool add_features = /* D.use_structure && */ sch.predictNeedsExample(); // add_features = false; @@ -437,9 +437,9 @@ void run(Search::search& sch, multi_ex& ec) // add all the conditioning for (size_t i = 0; i < D.adj[n].size(); i++) { - for (size_t j = 0; j < ec[i]->l.cs.costs.size(); j++) + for (size_t j = 0; j < ec[i]->l.cs().costs.size(); j++) { - uint32_t m = ec[i]->l.cs.costs[j].class_index; + uint32_t m = ec[i]->l.cs().costs[j].class_index; if (m == 0) continue; m--; @@ -451,15 +451,15 @@ void run(Search::search& sch, multi_ex& ec) // make the prediction D.pred[n] = P.predict(); - if (ec[n]->l.cs.costs.size() > 0) // for test examples - sch.loss((ec[n]->l.cs.costs[0].class_index == D.pred[n]) ? 0.f : (last_loop ? 0.5f : loss_val)); + if (ec[n]->l.cs().costs.size() > 0) // for test examples + sch.loss((ec[n]->l.cs().costs[0].class_index == D.pred[n]) ? 0.f : (last_loop ? 0.5f : loss_val)); if (add_features) del_edge_features(D, n, ec); } } - for (uint32_t n = 0; n < D.N; n++) D.confusion_matrix[IDX(ec[n]->l.cs.costs[0].class_index, D.pred[n])]++; + for (uint32_t n = 0; n < D.N; n++) D.confusion_matrix[IDX(ec[n]->l.cs().costs[0].class_index, D.pred[n])]++; sch.loss(1.f - macro_f(D)); if (sch.output().good()) diff --git a/vowpalwabbit/search_meta.cc b/vowpalwabbit/search_meta.cc index a34c1c4fe0a..4284612dda0 100644 --- a/vowpalwabbit/search_meta.cc +++ b/vowpalwabbit/search_meta.cc @@ -69,17 +69,11 @@ struct task_data std::stringstream* kbest_out; task_data(size_t mb, size_t kb) : max_branches(mb), kbest(kb) { - branches = v_init(); - final = v_init >(); - trajectory = v_init(); output_string = nullptr; kbest_out = nullptr; } ~task_data() { - branches.delete_v(); - final.delete_v(); - trajectory.delete_v(); delete output_string; delete kbest_out; } @@ -125,7 +119,7 @@ void run(Search::search& sch, multi_ex& ec) return; // ignore the taken action task_data& d = *sch.get_metatask_data(); float delta = a_cost - min_cost; - path branch = v_init(); + path branch; push_many(branch, d.trajectory.begin(), d.trajectory.size()); branch.push_back(std::make_pair(a, a_cost)); d.branches.push_back(std::make_pair(delta, branch)); @@ -147,7 +141,7 @@ void run(Search::search& sch, multi_ex& ec) { // construct the final trajectory - path original_final = v_init(); + path original_final; copy_array(original_final, d.trajectory); d.final.push_back(std::make_pair(std::make_pair(d.total_cost, original_final), d.output_string)); } @@ -189,7 +183,7 @@ void run(Search::search& sch, multi_ex& ec) { // construct the final trajectory - path this_final = v_init(); + path this_final; copy_array(this_final, d.trajectory); d.final.push_back(std::make_pair(std::make_pair(d.total_cost, this_final), d.output_string)); } @@ -237,11 +231,9 @@ void run(Search::search& sch, multi_ex& ec) .Run(); // clean up memory - for (size_t i = 0; i < d.branches.size(); i++) d.branches[i].second.delete_v(); d.branches.clear(); for (size_t i = 0; i < d.final.size(); i++) { - d.final[i].first.second.delete_v(); delete d.final[i].second; } d.final.clear(); diff --git a/vowpalwabbit/search_multiclasstask.cc b/vowpalwabbit/search_multiclasstask.cc index ad44bd71164..8c151d28654 100644 --- a/vowpalwabbit/search_multiclasstask.cc +++ b/vowpalwabbit/search_multiclasstask.cc @@ -32,14 +32,13 @@ void initialize(Search::search& sch, size_t& num_actions, VW::config::options_i& void finish(Search::search& sch) { task_data* my_task_data = sch.get_task_data(); - my_task_data->y_allowed.delete_v(); delete my_task_data; } void run(Search::search& sch, multi_ex& ec) { task_data* my_task_data = sch.get_task_data(); - size_t gold_label = ec[0]->l.multi.label; + size_t gold_label = ec[0]->l.multi().label; size_t label = 0; size_t learner_id = 0; diff --git a/vowpalwabbit/search_sequencetask.cc b/vowpalwabbit/search_sequencetask.cc index 2d5789da2fc..afebd196ff7 100644 --- a/vowpalwabbit/search_sequencetask.cc +++ b/vowpalwabbit/search_sequencetask.cc @@ -42,7 +42,7 @@ void run(Search::search& sch, multi_ex& ec) Search::predictor P(sch, (ptag)0); for (size_t i = 0; i < ec.size(); i++) { - action oracle = ec[i]->l.multi.label; + action oracle = ec[i]->l.multi().label; size_t prediction = P.set_tag((ptag)i + 1) .set_input(*ec[i]) .set_oracle(oracle) @@ -96,9 +96,9 @@ void convert_bio_to_bilou(multi_ex& ec) { for (size_t n = 0; n < ec.size(); n++) { - MULTICLASS::label_t& ylab = ec[n]->l.multi; + MULTICLASS::label_t& ylab = ec[n]->l.multi(); action y = ylab.label; - action nexty = (n == ec.size() - 1) ? 0 : ec[n + 1]->l.multi.label; + action nexty = (n == ec.size() - 1) ? 0 : ec[n + 1]->l.multi().label; if (y == 1) // do nothing ; else if (y % 2 == 0) // this is a begin-X @@ -179,8 +179,6 @@ void initialize(Search::search& sch, size_t& num_actions, options_i& options) void finish(Search::search& sch) { task_data* D = sch.get_task_data(); - D->allowed_actions.delete_v(); - D->only_two_allowed.delete_v(); delete D; } @@ -198,7 +196,7 @@ void takedown(Search::search& sch, multi_ex& ec) if (D.encoding == BILOU) for (size_t n = 0; n < ec.size(); n++) { - MULTICLASS::label_t ylab = ec[n]->l.multi; + MULTICLASS::label_t ylab = ec[n]->l.multi(); ylab.label = bilou_to_bio(ylab.label); } } @@ -213,7 +211,7 @@ void run(Search::search& sch, multi_ex& ec) action last_prediction = 1; for (size_t i = 0; i < ec.size(); i++) { - action oracle = ec[i]->l.multi.label; + action oracle = ec[i]->l.multi().label; size_t len = y_allowed->size(); P.set_tag((ptag)i + 1); P.set_learner_id(pass - 1); @@ -286,7 +284,7 @@ void run(Search::search& sch, multi_ex& ec) Search::predictor P(sch, (ptag)0); for (size_t i = 0; i < ec.size(); i++) { - action oracle = ec[i]->l.multi.label; + action oracle = ec[i]->l.multi().label; for (size_t k = 0; k < K; k++) costs[k] = 1.; costs[oracle - 1] = 0.; size_t prediction = P.set_tag((ptag)i + 1) @@ -343,12 +341,12 @@ void run(Search::search& sch, multi_ex& ec) uint32_t max_prediction = 1; uint32_t max_label = 1; - for (size_t i = 0; i < ec.size(); i++) max_label = std::max(ec[i]->l.multi.label, max_label); + for (size_t i = 0; i < ec.size(); i++) max_label = std::max(ec[i]->l.multi().label, max_label); for (ptag i = 0; i < ec.size(); i++) { // labels should be 1 or 2, and our output is MAX of all predicted values - uint32_t oracle = D.predict_max ? max_label : ec[i]->l.multi.label; + uint32_t oracle = D.predict_max ? max_label : ec[i]->l.multi().label; uint32_t prediction = sch.predict(*ec[i], i + 1, &oracle, 1, &i, "p"); max_prediction = std::max(prediction, max_prediction); @@ -378,12 +376,12 @@ void initialize(Search::search& sch, size_t& num_actions, options_i& /*options*/ { CS::wclass default_wclass = {0., 0, 0., 0.}; - example* ldf_examples = VW::alloc_examples(sizeof(CS::label), num_actions); + example* ldf_examples = VW::alloc_examples(num_actions); for (size_t a = 0; a < num_actions; a++) { - CS::label& lab = ldf_examples[a].l.cs; - CS::cs_label.default_label(&lab); - lab.costs.push_back(default_wclass); + auto& l = ldf_examples[a].l; + CS::cs_label.default_label(l); + l.cs().costs.push_back(default_wclass); ldf_examples[a].interactions = &sch.get_vw_pointer_unsafe().interactions; } @@ -400,7 +398,8 @@ void initialize(Search::search& sch, size_t& num_actions, options_i& /*options*/ void finish(Search::search& sch) { task_data* data = sch.get_task_data(); - for (size_t a = 0; a < data->num_actions; a++) VW::dealloc_example(CS::cs_label.delete_label, data->ldf_examples[a]); + for (size_t a = 0; a < data->num_actions; a++) + data->ldf_examples[a].~example(); free(data->ldf_examples); free(data); } @@ -430,7 +429,7 @@ void run(Search::search& sch, multi_ex& ec) } // regardless of whether the example is needed or not, the class info is needed - CS::label& lab = data->ldf_examples[a].l.cs; + CS::label& lab = data->ldf_examples[a].l.cs(); // need to tell search what the action id is, so that it can add history features correctly! lab.costs[0].x = 0.; lab.costs[0].class_index = a + 1; @@ -438,7 +437,7 @@ void run(Search::search& sch, multi_ex& ec) lab.costs[0].wap_value = 0.; } - action oracle = ec[i]->l.multi.label - 1; + action oracle = ec[i]->l.multi().label - 1; action pred_id = P.set_tag((ptag)(i + 1)) .set_input(data->ldf_examples, data->num_actions) .set_oracle(oracle) diff --git a/vowpalwabbit/sender.cc b/vowpalwabbit/sender.cc index e37196fa8c6..a00df3e98c5 100644 --- a/vowpalwabbit/sender.cc +++ b/vowpalwabbit/sender.cc @@ -38,8 +38,6 @@ struct sender ~sender() { - buf->files.delete_v(); - buf->space.delete_v(); free(delay_ring); delete buf; } @@ -72,21 +70,22 @@ void receive_result(sender& s) get_prediction(s.sd, res, weight); example& ec = *s.delay_ring[s.received_index++ % s.all->p->ring_size]; - ec.pred.scalar = res; + ec.pred.scalar() = res; - label_data& ld = ec.l.simple; - ec.loss = s.all->loss->getLoss(s.all->sd, ec.pred.scalar, ld.label) * ec.weight; + label_data& ld = ec.l.simple(); + ec.loss = s.all->loss->getLoss(s.all->sd, ec.pred.scalar(), ld.label) * ec.weight; - return_simple_example(*(s.all), nullptr, ec); + return_simple_example_explicit(*(s.all), ec); } void learn(sender& s, LEARNER::single_learner&, example& ec) { + assert(ec.pred.get_type() == prediction_type_t::scalar); if (s.received_index + s.all->p->ring_size / 2 - 1 == s.sent_index) receive_result(s); - s.all->set_minmax(s.all->sd, ec.l.simple.label); - s.all->p->lp.cache_label(&ec.l, *s.buf); // send label information. + s.all->set_minmax(s.all->sd, ec.l.simple().label); + s.all->p->lp.cache_label(ec.l, *s.buf); // send label information. cache_tag(*s.buf, ec.tag); send_features(s.buf, ec, (uint32_t)s.all->parse_mask); s.delay_ring[s.sent_index++ % s.all->p->ring_size] = &ec; @@ -124,5 +123,6 @@ LEARNER::base_learner* sender_setup(options_i& options, vw& all) LEARNER::learner& l = init_learner(s, learn, learn, 1); l.set_finish_example(finish_example); l.set_end_examples(end_examples); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/shared_feature_merger.cc b/vowpalwabbit/shared_feature_merger.cc index ce69ec052a2..2a347468c30 100644 --- a/vowpalwabbit/shared_feature_merger.cc +++ b/vowpalwabbit/shared_feature_merger.cc @@ -42,7 +42,8 @@ void predict_or_learn(sfm_data&, LEARNER::multi_learner& base, multi_ex& ec_seq) multi_ex::value_type shared_example = nullptr; - const bool has_example_header = CB::ec_is_example_header(*ec_seq[0]); + const bool has_example_header = CB::ec_is_example_header(*ec_seq[0]) + || COST_SENSITIVE::ec_is_example_header(*ec_seq[0]); if (has_example_header) { shared_example = ec_seq[0]; @@ -75,7 +76,7 @@ LEARNER::base_learner* shared_feature_merger_setup(config::options_i& options, v auto* base = LEARNER::as_multiline(setup_base(options, all)); auto& learner = LEARNER::init_learner(data, base, predict_or_learn, predict_or_learn); - + learner.label_type = base->label_type; // TODO: Incorrect feature numbers will be reported without merging the example namespaces from the // shared example in a finish_example function. However, its too expensive to perform the full operation. diff --git a/vowpalwabbit/simple_label.cc b/vowpalwabbit/simple_label.cc index f647efa43e2..539858f4975 100644 --- a/vowpalwabbit/simple_label.cc +++ b/vowpalwabbit/simple_label.cc @@ -6,32 +6,32 @@ #include #include #include - +#include "vw_string_view.h" #include "cache.h" #include "accumulate.h" #include "best_constant.h" #include "vw_string_view.h" -char* bufread_simple_label(shared_data* sd, label_data* ld, char* c) +char* bufread_simple_label(shared_data* sd, label_data& ld, char* c) { - memcpy(&ld->label, c, sizeof(ld->label)); - // std::cout << ld->label << " " << sd->is_more_than_two_labels_observed << " " << sd->first_observed_label << + memcpy(&ld.label, c, sizeof(ld.label)); + // std::cout << ld.label << " " << sd->is_more_than_two_labels_observed << " " << sd->first_observed_label << // std::endl; - c += sizeof(ld->label); - memcpy(&ld->weight, c, sizeof(ld->weight)); - c += sizeof(ld->weight); - memcpy(&ld->initial, c, sizeof(ld->initial)); - c += sizeof(ld->initial); + c += sizeof(ld.label); + memcpy(&ld.weight, c, sizeof(ld.weight)); + c += sizeof(ld.weight); + memcpy(&ld.initial, c, sizeof(ld.initial)); + c += sizeof(ld.initial); - count_label(sd, ld->label); + count_label(sd, ld.label); return c; } -size_t read_cached_simple_label(shared_data* sd, void* v, io_buf& cache) +size_t read_cached_simple_label(shared_data* sd, polylabel& in_ld, io_buf& cache) { - label_data* ld = (label_data*)v; + auto& ld = in_ld.simple(); char* c; - size_t total = sizeof(ld->label) + sizeof(ld->weight) + sizeof(ld->initial); + size_t total = sizeof(ld.label) + sizeof(ld.weight) + sizeof(ld.initial); if (cache.buf_read(c, total) < total) return 0; bufread_simple_label(sd, ld, c); @@ -39,91 +39,99 @@ size_t read_cached_simple_label(shared_data* sd, void* v, io_buf& cache) return total; } -float get_weight(void* v) -{ - label_data* ld = (label_data*)v; - return ld->weight; -} +float get_weight(polylabel& v) { return v.simple().weight; } -char* bufcache_simple_label(label_data* ld, char* c) +char* bufcache_simple_label(label_data& ld, char* c) { - memcpy(c, &ld->label, sizeof(ld->label)); - c += sizeof(ld->label); - memcpy(c, &ld->weight, sizeof(ld->weight)); - c += sizeof(ld->weight); - memcpy(c, &ld->initial, sizeof(ld->initial)); - c += sizeof(ld->initial); + memcpy(c, &ld.label, sizeof(ld.label)); + c += sizeof(ld.label); + memcpy(c, &ld.weight, sizeof(ld.weight)); + c += sizeof(ld.weight); + memcpy(c, &ld.initial, sizeof(ld.initial)); + c += sizeof(ld.initial); return c; } -void cache_simple_label(void* v, io_buf& cache) +void cache_simple_label(polylabel& v, io_buf& cache) { char* c; - label_data* ld = (label_data*)v; - cache.buf_write(c, sizeof(ld->label) + sizeof(ld->weight) + sizeof(ld->initial)); + auto& ld = v.simple(); + cache.buf_write(c, sizeof(ld.label) + sizeof(ld.weight) + sizeof(ld.initial)); bufcache_simple_label(ld, c); } -void default_simple_label(void* v) +void default_simple_label(polylabel& v) { - label_data* ld = (label_data*)v; + label_data* ld; + if (v.get_type() == label_type_t::unset) + { + ld = &v.init_as_simple(); + } + else if (v.get_type() == label_type_t::simple) + { + ld = &v.simple(); + } + else + { + v.reset(); + ld = &v.init_as_simple(); + } + ld->label = FLT_MAX; ld->weight = 1.; ld->initial = 0.; } -bool test_label(void* v) +bool test_label(polylabel& v) { - label_data* ld = (label_data*)v; - return ld->label == FLT_MAX; + auto& ld = v.simple(); + return ld.label == FLT_MAX; } -void delete_simple_label(void*) {} - -void parse_simple_label(parser*, shared_data* sd, void* v, v_array& words) +void parse_simple_label(parser*, shared_data* sd, polylabel& v, v_array& words) { - label_data* ld = (label_data*)v; + auto& ld = v.simple(); switch (words.size()) { case 0: break; case 1: - ld->label = float_of_string(words[0]); + ld.label = float_of_string(words[0]); break; case 2: - ld->label = float_of_string(words[0]); - ld->weight = float_of_string(words[1]); + ld.label = float_of_string(words[0]); + ld.weight = float_of_string(words[1]); break; case 3: - ld->label = float_of_string(words[0]); - ld->weight = float_of_string(words[1]); - ld->initial = float_of_string(words[2]); + ld.label = float_of_string(words[0]); + ld.weight = float_of_string(words[1]); + ld.initial = float_of_string(words[2]); break; default: std::cout << "Error: " << words.size() << " is too many tokens for a simple label: "; - for (const auto & word : words) std::cout << word; + for (const auto& word : words) std::cout << word; std::cout << std::endl; } - count_label(sd, ld->label); + count_label(sd, ld.label); } label_parser simple_label = {default_simple_label, parse_simple_label, cache_simple_label, read_cached_simple_label, - delete_simple_label, get_weight, nullptr, test_label, sizeof(label_data)}; + polylabel_delete_label, get_weight, polylabel_copy_label, test_label, sizeof(label_data)}; void print_update(vw& all, example& ec) { if (all.sd->weighted_labeled_examples + all.sd->weighted_unlabeled_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { - all.sd->print_update(all.holdout_set_off, all.current_pass, ec.l.simple.label, ec.pred.scalar, ec.num_features, + all.sd->print_update(all.holdout_set_off, all.current_pass, ec.l.simple().label, ec.pred.scalar(), ec.num_features, all.progress_add, all.progress_arg); } } void output_and_account_example(vw& all, example& ec) { - label_data ld = ec.l.simple; + label_data ld = ec.l.simple(); all.sd->update(ec.test_only, ld.label != FLT_MAX, ec.loss, ec.weight, ec.num_features); if (ld.label != FLT_MAX && !ec.test_only) @@ -133,13 +141,19 @@ void output_and_account_example(vw& all, example& ec) for (size_t i = 0; i < all.final_prediction_sink.size(); i++) { int f = (int)all.final_prediction_sink[i]; - all.print_by_ref(f, ec.pred.scalar, 0, ec.tag); + all.print_by_ref(f, ec.pred.scalar(), 0, ec.tag); } print_update(all, ec); } -void return_simple_example(vw& all, void*, example& ec) +void return_simple_example_explicit(vw& all, example& ec) +{ + output_and_account_example(all, ec); + VW::finish_example(all, ec); +} + +void return_simple_example(vw& all, polylabel&, example& ec) { output_and_account_example(all, ec); VW::finish_example(all, ec); diff --git a/vowpalwabbit/simple_label.h b/vowpalwabbit/simple_label.h index 231e5246918..3bfb8c22373 100644 --- a/vowpalwabbit/simple_label.h +++ b/vowpalwabbit/simple_label.h @@ -4,6 +4,8 @@ #pragma once #include "label_parser.h" +#include + struct example; struct vw; @@ -12,9 +14,13 @@ struct label_data float label; float weight; float initial; + + label_data() : label(FLT_MAX), weight(0.f), initial(0.f) {} + label_data(float label, float weight, float initial) : label(label), weight(weight), initial(initial) {} }; -void return_simple_example(vw& all, void*, example& ec); +void return_simple_example(vw& all, polylabel&, example& ec); +void return_simple_example_explicit(vw& all, example& ec); extern label_parser simple_label; diff --git a/vowpalwabbit/stagewise_poly.cc b/vowpalwabbit/stagewise_poly.cc index f5867ee7421..58dd9cc98f8 100644 --- a/vowpalwabbit/stagewise_poly.cc +++ b/vowpalwabbit/stagewise_poly.cc @@ -75,8 +75,6 @@ struct stagewise_poly cout << "total feature number (after poly expansion!) = " << sum_sparsity << std::endl; #endif // DEBUG - //synth_ec.feature_space[tree_atomics].delete_v(); - synth_ec.indices.delete_v(); free(sd); free(depthsbits); } @@ -503,12 +501,12 @@ void predict(stagewise_poly &poly, single_learner &base, example &ec) base.predict(poly.synth_ec); ec.partial_prediction = poly.synth_ec.partial_prediction; ec.updated_prediction = poly.synth_ec.updated_prediction; - ec.pred.scalar = poly.synth_ec.pred.scalar; + ec.pred.scalar() = poly.synth_ec.pred.scalar(); } void learn(stagewise_poly &poly, single_learner &base, example &ec) { - bool training = poly.all->training && ec.l.simple.label != FLT_MAX; + bool training = poly.all->training && ec.l.simple().label != FLT_MAX; poly.original_ec = &ec; if (training) @@ -523,7 +521,7 @@ void learn(stagewise_poly &poly, single_learner &base, example &ec) base.learn(poly.synth_ec); ec.partial_prediction = poly.synth_ec.partial_prediction; ec.updated_prediction = poly.synth_ec.updated_prediction; - ec.pred.scalar = poly.synth_ec.pred.scalar; + ec.pred.scalar() = poly.synth_ec.pred.scalar(); if (ec.example_counter // following line is to avoid repeats when multiple reductions on same example. @@ -656,7 +654,7 @@ void save_load(stagewise_poly &poly, io_buf &model_file, bool read, bool text) //#endif //DEBUG } -base_learner *stagewise_poly_setup(options_i &options, vw &all) +base_learner* stagewise_poly_setup(options_i &options, vw &all) { auto poly = scoped_calloc_or_throw(); bool stage_poly = false; @@ -696,10 +694,12 @@ base_learner *stagewise_poly_setup(options_i &options, vw &all) poly->original_ec = nullptr; poly->next_batch_sz = poly->batch_sz; + poly->synth_ec.pred.init_as_scalar(); + learner &l = init_learner(poly, as_singleline(setup_base(options, all)), learn, predict); l.set_save_load(save_load); l.set_finish_example(finish_example); l.set_end_pass(end_pass); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/svrg.cc b/vowpalwabbit/svrg.cc index ef4cf03d3ec..0425c84a8c0 100644 --- a/vowpalwabbit/svrg.cc +++ b/vowpalwabbit/svrg.cc @@ -44,7 +44,7 @@ inline void vec_add(float& p, const float x, float& w) template inline float inline_predict(vw& all, example& ec) { - float acc = ec.l.simple.initial; + float acc = ec.l.simple().initial; GD::foreach_feature >(all, ec, acc); return acc; } @@ -59,12 +59,12 @@ float predict_stable(const svrg& s, example& ec) void predict(svrg& s, single_learner&, example& ec) { ec.partial_prediction = inline_predict(*s.all, ec); - ec.pred.scalar = GD::finalize_prediction(s.all->sd, ec.partial_prediction); + ec.pred.scalar() = GD::finalize_prediction(s.all->sd, ec.partial_prediction); } float gradient_scalar(const svrg& s, const example& ec, float pred) { - return s.all->loss->first_derivative(s.all->sd, pred, ec.l.simple.label) * ec.weight; + return s.all->loss->first_derivative(s.all->sd, pred, ec.l.simple().label) * ec.weight; } // -- Updates, taking inner steps vs. accumulating a full gradient -- @@ -93,7 +93,7 @@ void update_inner(const svrg& s, example& ec) { update u; // |ec| already has prediction according to inner weights. - u.g_scalar_inner = gradient_scalar(s, ec, ec.pred.scalar); + u.g_scalar_inner = gradient_scalar(s, ec, ec.pred.scalar()); u.g_scalar_stable = gradient_scalar(s, ec, predict_stable(s, ec)); u.eta = s.all->eta; u.norm = (float)s.stable_grad_count; @@ -190,5 +190,6 @@ base_learner* svrg_setup(options_i& options, vw& all) all.weights.stride_shift(2); learner& l = init_learner(s, learn, predict, UINT64_ONE << all.weights.stride_shift()); l.set_save_load(save_load); + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/topk.cc b/vowpalwabbit/topk.cc index 959b0867424..9d88e21f38e 100644 --- a/vowpalwabbit/topk.cc +++ b/vowpalwabbit/topk.cc @@ -43,7 +43,7 @@ void VW::topk::predict(LEARNER::single_learner& base, multi_ex& ec_seq) for (auto ec : ec_seq) { base.predict(*ec); - update_priority_queue(ec->pred.scalar, ec->tag); + update_priority_queue(ec->pred.scalar(), ec->tag); } } @@ -52,7 +52,7 @@ void VW::topk::learn(LEARNER::single_learner& base, multi_ex& ec_seq) for (auto ec : ec_seq) { base.learn(*ec); - update_priority_queue(ec->pred.scalar, ec->tag); + update_priority_queue(ec->pred.scalar(), ec->tag); } } @@ -101,7 +101,7 @@ void print_result(int file_descriptor, std::pairupdate(ec.test_only, ld.label != FLT_MAX, ec.loss, ec.weight, ec.num_features); if (ld.label != FLT_MAX) @@ -142,6 +142,6 @@ LEARNER::base_learner* topk_setup(options_i& options, vw& all) LEARNER::learner& l = init_learner(data, as_singleline(setup_base(options, all)), predict_or_learn, predict_or_learn); l.set_finish_example(finish_example); - + l.label_type = label_type_t::simple; return make_base(l); } diff --git a/vowpalwabbit/util.h b/vowpalwabbit/util.h new file mode 100644 index 00000000000..ec1abdbca9f --- /dev/null +++ b/vowpalwabbit/util.h @@ -0,0 +1,20 @@ +#pragma once + +#include "example.h" +#include "prediction.h" + +inline void swap_to_scores(multi_ex& examples) +{ + for (auto& ex : examples) + { + ex->pred.reinterpret(prediction_type_t::action_scores); + } +} + +inline void swap_to_probs(multi_ex& examples) +{ + for (auto& ex : examples) + { + ex->pred.reinterpret(prediction_type_t::action_probs); + } +} \ No newline at end of file diff --git a/vowpalwabbit/v_array.h b/vowpalwabbit/v_array.h index 48b4ec9a72b..feb795a54a3 100644 --- a/vowpalwabbit/v_array.h +++ b/vowpalwabbit/v_array.h @@ -22,12 +22,28 @@ #endif #include "memory.h" +#include "future_compat.h" const size_t erase_point = ~((1u << 10u) - 1u); template struct v_array { + private: + void delete_v_array() + { + if (_begin != nullptr) + { + for (T* item = _begin; item != _end; ++item) item->~T(); + free(_begin); + } + _begin = nullptr; + _end = nullptr; + end_array = nullptr; + erase_count = 0; + } + + // private: T* _begin; T* _end; @@ -46,12 +62,50 @@ struct v_array inline T* cbegin() const { return _begin; } inline T* cend() const { return _end; } - // v_array cannot have a user-defined constructor, because it participates in various unions. - // union members cannot have user-defined constructors. - // v_array() : _begin(nullptr), _end(nullptr), end_array(nullptr), erase_count(0) {} - // ~v_array() { - // delete_v(); - // } + v_array() : _begin(nullptr), _end(nullptr), end_array(nullptr), erase_count(0) {} + ~v_array() { delete_v_array(); } + + v_array(v_array&& other) + { + erase_count = 0; + _begin = nullptr; + _end = nullptr; + end_array = nullptr; + + std::swap(_begin, other._begin); + std::swap(_end, other._end); + std::swap(end_array, other.end_array); + std::swap(erase_count, other.erase_count); + } + + v_array& operator=(v_array&& other) + { + delete_v_array(); + std::swap(_begin, other._begin); + std::swap(_end, other._end); + std::swap(end_array, other.end_array); + std::swap(erase_count, other.erase_count); + return *this; + } + + v_array(const v_array& other) + { + _begin = nullptr; + _end = nullptr; + end_array = nullptr; + erase_count = 0; + + // TODO this should use the other version when T is trivially copyable and this otherwise. + copy_array_no_memcpy(*this, other); + } + + v_array& operator=(const v_array& other) + { + delete_v_array(); + copy_array_no_memcpy(*this, other); + return *this; + } + T last() const { return *(_end - 1); } T pop() { return *(--_end); } bool empty() const { return _begin == _end; } @@ -93,14 +147,10 @@ struct v_array for (T* item = _begin; item != _end; ++item) item->~T(); _end = _begin; } - void delete_v() - { - if (_begin != nullptr) - { - for (T* item = _begin; item != _end; ++item) item->~T(); - free(_begin); - } - _begin = _end = end_array = nullptr; + + VW_DEPRECATED("delete_v is no longer supported. Use the destructor of the object to clean up.") + void delete_v() { + delete_v_array(); } void push_back(const T& new_ele) { @@ -109,7 +159,15 @@ struct v_array new (_end++) T(new_ele); } + void push_back(T&& new_ele) + { + if (_end == end_array) + resize(2 * (end_array - _begin) + 3); + new (_end++) T(std::move(new_ele)); + } + void push_back_unchecked(const T& new_ele) { new (_end++) T(new_ele); } + void push_back_unchecked(T&& new_ele) { new (_end++) T(std::move(new_ele)); } template void emplace_back(Args&&... args) @@ -180,12 +238,26 @@ struct v_array return false; } + + template + friend void copy_array(v_array& dst, const v_array& src); + template + friend void copy_array_no_memcpy(v_array& dst, const v_array& src); + template + friend void copy_array(v_array& dst, const v_array& src, U (*copy_item)(U&)); + template + friend void push_many(v_array& v, const U* _begin, size_t num); + template + friend void calloc_reserve(v_array& v, size_t length); + + friend class io_buf; }; template +VW_DEPRECATED("v_init is no longer supported, use the constructor.") inline v_array v_init() { - return {nullptr, nullptr, nullptr, 0}; + return v_array(); } template @@ -232,18 +304,20 @@ void calloc_reserve(v_array& v, size_t length) } template +VW_DEPRECATED("This performs a copy return and is no longer possible. Need to work out a better way here.") v_array pop(v_array >& stack) { - if (stack._end != stack._begin) - return *(--stack._end); + if (stack.end() != stack.begin()) + return *(--stack.end()); else return v_array(); } template +VW_DEPRECATED("Use std::find") bool v_array_contains(v_array& A, T x) { - for (T* e = A._begin; e != A._end; ++e) + for (T* e = A.begin(); e != A.end(); ++e) if (*e == x) return true; return false; @@ -253,7 +327,7 @@ template std::ostream& operator<<(std::ostream& os, const v_array& v) { os << '['; - for (T* i = v._begin; i != v._end; ++i) os << ' ' << *i; + for (const T* i = v.begin(); i != v.end(); ++i) os << ' ' << *i; os << " ]"; return os; } @@ -262,24 +336,7 @@ template std::ostream& operator<<(std::ostream& os, const v_array >& v) { os << '['; - for (std::pair* i = v._begin; i != v._end; ++i) os << ' ' << i->first << ':' << i->second; + for (const std::pair* i = v.begin(); i != v.end(); ++i) os << ' ' << i->first << ':' << i->second; os << " ]"; return os; } - -typedef v_array v_string; - -inline v_string string2v_string(const std::string& s) -{ - v_string res = v_init(); - if (!s.empty()) - push_many(res, (unsigned char*)s.data(), s.size()); - return res; -} - -inline std::string v_string2string(const v_string& v_s) -{ - std::string res; - for (unsigned char* i = v_s._begin; i != v_s._end; ++i) res.push_back(*i); - return res; -} diff --git a/vowpalwabbit/v_array_pool.h b/vowpalwabbit/v_array_pool.h deleted file mode 100644 index f0e615b4217..00000000000 --- a/vowpalwabbit/v_array_pool.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) by respective owners including Yahoo!, Microsoft, and -// individual contributors. All rights reserved. Released under a BSD (revised) -// license as described in the file LICENSE. - -#pragma once - -#include "v_array.h" -#include "object_pool.h" - -namespace VW -{ -template -struct v_array_allocator -{ - v_array operator()() { return v_init(); } -}; - -template -struct v_array_deleter -{ - void operator()(v_array& array) { array.delete_v(); } -}; - -template -using v_array_pool = VW::value_object_pool, v_array_allocator, v_array_deleter>; -} // namespace VW \ No newline at end of file diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index ede377c1c08..570d4758574 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -84,8 +84,13 @@ example* import_example(vw& all, const std::string& label, primitive_feature_spa // thus any delay introduced when freeing examples must be at least as long as the one // introduced by all.l->finish_example implementations. // e.g. multiline examples as used by cb_adf must not be released before the finishing newline example. +VW_DEPRECATED("Do not need to specify label size, use new instead") example* alloc_examples(size_t, size_t); -void dealloc_example(void (*delete_label)(void*), example& ec, void (*delete_prediction)(void*) = nullptr); +VW_DEPRECATED("Use new instead") +example* alloc_examples(size_t); + +VW_DEPRECATED("Examples can simply be deleted now.") +void dealloc_example(void (*delete_label)(polylabel&), example& ec, void (*delete_prediction)(void*) = nullptr); void parse_example_label(vw& all, example& ec, std::string label); void setup_examples(vw& all, v_array& examples); @@ -117,7 +122,8 @@ void finish_example(vw& all, example& ec); void finish_example(vw& all, multi_ex& ec); void empty_example(vw& all, example& ec); -void copy_example_data(bool audit, example*, example*, size_t, void (*copy_label)(void*, void*)); +VW_DEPRECATED("Copy the label object directly.") +void copy_example_data(bool audit, example*, example*, size_t, void (*copy_label)(polylabel&, polylabel&)); void copy_example_metadata(bool audit, example*, example*); void copy_example_data(bool audit, example*, example*); // metadata + features, don't copy the label void move_feature_namespace(example* dst, example* src, namespace_index c); diff --git a/vowpalwabbit/vw.vcxproj b/vowpalwabbit/vw.vcxproj index f5f0bc0a3d5..848aabe70ff 100644 --- a/vowpalwabbit/vw.vcxproj +++ b/vowpalwabbit/vw.vcxproj @@ -1,4 +1,4 @@ - + @@ -150,4 +150,4 @@ - \ No newline at end of file + diff --git a/vowpalwabbit/vw_core.vcxproj b/vowpalwabbit/vw_core.vcxproj index 5490691c009..3a1169411e9 100644 --- a/vowpalwabbit/vw_core.vcxproj +++ b/vowpalwabbit/vw_core.vcxproj @@ -28,7 +28,7 @@ v141 $(MSBuildProjectDirectory)\..\sdl\SDL-7.0-Recommended.ruleset - true + false 10.0.16299.0 @@ -159,6 +159,7 @@ + @@ -192,7 +193,7 @@ - + @@ -275,6 +276,7 @@ + diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index b979e1cb237..88cc10e4a5e 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -11,6 +11,7 @@ #include "hash.h" #include "explore.h" #include "vw_exception.h" +#include "util.h" #include #include @@ -85,31 +86,19 @@ struct warm_cb ~warm_cb() { - CB::cb_label.delete_label(&cb_label); - a_s.delete_v(); + delete[] csls; + delete[] cbls; - for (size_t a = 0; a < num_actions; ++a) + for (auto& ex : ecs) { - COST_SENSITIVE::cs_label.delete_label(&csls[a]); + ex->~example(); + free(ex); } - free(csls); - free(cbls); - for (size_t a = 0; a < num_actions; ++a) + for (auto& ex : ws_vali) { - ecs[a]->pred.a_s.delete_v(); - VW::dealloc_example(CB::cb_label.delete_label, *ecs[a]); - free_it(ecs[a]); - } - - a_s_adf.delete_v(); - for (size_t i = 0; i < ws_vali.size(); ++i) - { - if (use_cs) - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *ws_vali[i]); - else - VW::dealloc_example(MULTICLASS::mc_label.delete_label, *ws_vali[i]); - free(ws_vali[i]); + ex->~example(); + free(ex); } } }; @@ -137,20 +126,10 @@ float loss_cs(warm_cb& data, v_array& costs, uint32_t fi } template -uint32_t find_min(std::vector arr) +uint32_t find_min(std::vector& arr) { - T min_val = FLT_MAX; - uint32_t argmin = 0; - - for (uint32_t i = 0; i < arr.size(); i++) - { - if (arr[i] < min_val) - { - min_val = arr[i]; - argmin = i; - } - } - return argmin; + auto min = std::min_element(arr.begin(), arr.end()); + return static_cast(std::distance(arr.begin(), min)); } void finish(warm_cb& data) @@ -175,8 +154,12 @@ void copy_example_to_adf(warm_cb& data, example& ec) { auto& eca = *data.ecs[a]; // clear label - auto& lab = eca.l.cb; - CB::cb_label.default_label(&lab); + CB::default_label(eca.l.cb()); + if (eca.pred.get_type() != prediction_type_t::action_probs) + { + eca.pred.reset(); + eca.pred.init_as_action_probs(); + } // copy data VW::copy_example_data(false, &eca, &ec); @@ -191,7 +174,7 @@ void copy_example_to_adf(warm_cb& data, example& ec) } // avoid empty example by adding a tag (hacky) - if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(&eca.l)) + if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(eca.l)) { eca.tag.push_back('n'); } @@ -308,7 +291,7 @@ uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, { copy_example_to_adf(data, ec); base.predict(data.ecs, i); - return data.ecs[0]->pred.a_s[0].action + 1; + return data.ecs[0]->pred.action_probs()[0].action + 1; } void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec) @@ -328,13 +311,9 @@ template void add_to_vali(warm_cb& data, example& ec) { // TODO: set the first parameter properly - example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1); - - if (use_cs) - VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); - else - VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); - + example* ec_copy = VW::alloc_examples(1); + // Label copy is automatic now -> hence the nullptr + VW::copy_example_data(false, ec_copy, &ec, 0, nullptr); data.ws_vali.push_back(ec_copy); } @@ -355,19 +334,22 @@ void learn_sup_adf(warm_cb& data, example& ec, int ec_type) { csls[a].costs[0].class_index = a + 1; if (use_cs) - csls[a].costs[0].x = loss_cs(data, ec.l.cs.costs, a + 1); + csls[a].costs[0].x = loss_cs(data, ec.l.cs().costs, a + 1); else - csls[a].costs[0].x = loss(data, ec.l.multi.label, a + 1); + csls[a].costs[0].x = loss(data, ec.l.multi().label, a + 1); } for (size_t a = 0; a < data.num_actions; ++a) { - cbls[a] = data.ecs[a]->l.cb; - data.ecs[a]->l.cs = csls[a]; + cbls[a] = std::move(data.ecs[a]->l.cb()); + data.ecs[a]->l.reset(); + data.ecs[a]->l.init_as_cs(std::move(csls[a])); } std::vector old_weights; for (size_t a = 0; a < data.num_actions; ++a) old_weights.push_back(data.ecs[a]->weight); + swap_to_scores(data.ecs); + for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); @@ -376,9 +358,16 @@ void learn_sup_adf(warm_cb& data, example& ec, int ec_type) cs_learner->learn(data.ecs, i); } + swap_to_probs(data.ecs); + for (size_t a = 0; a < data.num_actions; ++a) data.ecs[a]->weight = old_weights[a]; - for (size_t a = 0; a < data.num_actions; ++a) data.ecs[a]->l.cb = cbls[a]; + for (size_t a = 0; a < data.num_actions; ++a) + { + csls[a] = std::move(data.ecs[a]->l.cs()); + data.ecs[a]->l.reset(); + data.ecs[a]->l.init_as_cb(std::move(cbls[a])); + } } template @@ -389,7 +378,7 @@ void predict_or_learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, i if (ind_update(data, ec_type)) learn_sup_adf(data, ec, ec_type); - ec.pred.multiclass = action; + ec.pred.multiclass() = action; } uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec) @@ -401,12 +390,12 @@ uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec) auto& out_ec = *data.ecs[0]; uint32_t chosen_action; - if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), - end_scores(out_ec.pred.a_s), chosen_action)) + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.action_probs()), + end_scores(out_ec.pred.action_probs()), chosen_action)) THROW("Failed to sample from pdf"); auto& a_s = data.a_s_adf; - copy_array(a_s, out_ec.pred.a_s); + copy_array(a_s, out_ec.pred.action_probs()); return chosen_action; } @@ -417,7 +406,7 @@ void learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_ty // add cb label to chosen action auto& cl = data.cl_adf; - auto& lab = data.ecs[cl.action - 1]->l.cb; + auto& lab = data.ecs[cl.action - 1]->l.cb(); lab.costs.push_back(cl); std::vector old_weights; @@ -447,9 +436,9 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec THROW("No action with non-zero probability found!"); if (use_cs) - cl.cost = loss_cs(data, ec.l.cs.costs, cl.action); + cl.cost = loss_cs(data, ec.l.cs().costs, cl.action); else - cl.cost = loss(data, ec.l.multi.label, cl.action); + cl.cost = loss(data, ec.l.multi().label, cl.action); if (ec_type == INTERACTION) accumu_costs_iv_adf(data, base, ec); @@ -457,7 +446,7 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec if (ind_update(data, ec_type)) learn_bandit_adf(data, base, ec, ec_type); - ec.pred.multiclass = cl.action; + ec.pred.multiclass() = cl.action; } void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec) @@ -477,12 +466,12 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) { // Corrupt labels (only corrupting multiclass labels as of now) if (use_cs) - data.cs_label = ec.l.cs; + data.cs_label = ec.l.cs(); else { - data.mc_label = ec.l.multi; + data.mc_label = ec.l.multi(); if (data.ws_iter < data.ws_period) - ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); + ec.l.multi().label = corrupt_action(data, data.mc_label.label, WARM_START); } // Warm start phase @@ -508,14 +497,14 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) else { ec.weight = 0; - ec.pred.multiclass = 1; + ec.pred.multiclass() = 1; } // Restore the original labels if (use_cs) - ec.l.cs = data.cs_label; + ec.l.cs() = std::move(data.cs_label); else - ec.l.multi = data.mc_label; + ec.l.multi() = data.mc_label; } void init_adf_data(warm_cb& data, const uint32_t num_actions) @@ -528,19 +517,19 @@ void init_adf_data(warm_cb& data, const uint32_t num_actions) data.ecs.resize(num_actions); for (size_t a = 0; a < num_actions; ++a) { - data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1); - auto& lab = data.ecs[a]->l.cb; - CB::cb_label.default_label(&lab); + data.ecs[a] = VW::alloc_examples(1); + auto& lab = data.ecs[a]->l.init_as_cb(); + CB::default_label(lab); } // The rest of the initialization is for warm start CB - data.csls = calloc_or_throw(num_actions); + data.csls = new COST_SENSITIVE::label[num_actions]; for (uint32_t a = 0; a < num_actions; ++a) { - COST_SENSITIVE::cs_label.default_label(&data.csls[a]); + COST_SENSITIVE::default_label(data.csls[a]); data.csls[a].costs.push_back({0, a + 1, 0, 0}); } - data.cbls = calloc_or_throw(num_actions); + data.cbls = new CB::label[num_actions]; data.ws_train_size = data.ws_period; data.ws_vali_size = 0; @@ -614,7 +603,6 @@ base_learner* warm_cb_setup(options_i& options, vw& all) } data->app_seed = uniform_hash("vw", 2, 0); - data->a_s = v_init(); data->all = &all; data->_random_state = all.get_random_state(); data->use_cs = use_cs; @@ -645,14 +633,19 @@ base_learner* warm_cb_setup(options_i& options, vw& all) } if (use_cs) + { l = &init_cost_sensitive_learner( data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data->choices_lambda); + l->label_type = label_type_t::cs; + } else + { l = &init_multiclass_learner( data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data->choices_lambda); + l->label_type = label_type_t::multi; + } l->set_finish(finish); - all.delete_prediction = nullptr; return make_base(*l); }