From 07c06e4073217763590f4163fbe6ea75b18ea07e Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Mon, 16 Feb 2015 13:54:26 -0500 Subject: [PATCH 01/13] fix load/save bug in graph search --- Makefile | 4 ++-- vowpalwabbit/search.cc | 9 ++++++--- vowpalwabbit/search_graph.cc | 8 ++++++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 28a091774ab..8521fc08396 100644 --- a/Makefile +++ b/Makefile @@ -66,8 +66,8 @@ endif FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 -DNDEBUG $(BOOST_INCLUDE) -fPIC #-DVW_LDA_NO_SSE # for profiling -- note that it needs to be gcc -#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -O2 -fno-strict-aliasing -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -pg -fPIC #-DVW_LDA_NO_S -#CXX = g++ +FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -O2 -fno-strict-aliasing -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -pg -fPIC #-DVW_LDA_NO_S +CXX = g++ # for valgrind / gdb debugging #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0 -fPIC diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index 058d7fcfdb4..b885754fb3c 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -1134,7 +1134,9 @@ namespace Search { for (size_t i=0; itraining && !ecs[0].test_only; + + if (not_test && cached_action_store_or_find(priv, mytag, condition_on, condition_on_names, priv.condition_on_actions.begin, condition_on_cnt, policy, learner_id, a, false)) // if this succeeded, 'a' has the right action priv.total_cache_hits++; else { // we need to predict, and then cache @@ -1165,7 +1167,8 @@ namespace Search { for (size_t n=start_K; n(), "the number of trained policies in a file") ("search_allowed_transitions",po::value(),"read file of allowed transitions [def: all transitions are allowed]") - ("search_subsample_time", po::value(), "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example") + ("search_subsample_time", po::value(), "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example, if v<0, use active learning") ("search_neighbor_features", po::value(), "copy features from neighboring lines. argument looks like: '-1:a,+2' meaning copy previous line namespace a and next next line from namespace _unnamed_, where ',' separates them") ("search_rollout_num_steps", po::value(), "how many calls of \"loss\" before we stop really predicting on rollouts and switch to oracle (def: 0 means \"infinite\")") ("search_history_length", po::value(), "some tasks allow you to specify how much history their depend on; specify that here [def: 1]") diff --git a/vowpalwabbit/search_graph.cc b/vowpalwabbit/search_graph.cc index 8aa0c349450..cd22cc72323 100644 --- a/vowpalwabbit/search_graph.cc +++ b/vowpalwabbit/search_graph.cc @@ -65,6 +65,7 @@ namespace GraphTask { vector pred; // predictions example*cur_node; // pointer to the current node for add_edge_features_fn float* neighbor_predictions; // prediction on this neighbor for add_edge_features_fn + weight* weight_vector; }; inline bool example_is_test(polylabel&l) { return l.cs.costs.size() == 0; } @@ -136,6 +137,7 @@ namespace GraphTask { D.mask = sch.get_vw_pointer_unsafe().reg.weight_mask; D.ss = sch.get_vw_pointer_unsafe().reg.stride_shift; + D.weight_vector = sch.get_vw_pointer_unsafe().reg.weight_vector; D.N = 0; D.E = 0; @@ -183,7 +185,8 @@ namespace GraphTask { example*node = D.cur_node; for (size_t k=0; k<=D.K; k++) { if (D.neighbor_predictions[k] == 0.) continue; - feature f = { fv * D.neighbor_predictions[k], (uint32_t) (( (fx >> D.ss) + 348919043 * k ) << D.ss) }; + feature f = { fv * D.neighbor_predictions[k], (uint32_t) ((( ((fx & D.mask) >> D.ss) + 348919043 * k ) << D.ss) & D.mask) }; + //cerr << "e: " << fx << " (:= " << ((fx & D.mask) >> D.ss) << ") / " << k << " -> " << f.weight_index << ", w=" << D.weight_vector[f.weight_index] << endl; node->atomics[neighbor_namespace].push_back(f); node->sum_feat_sq[neighbor_namespace] += f.x * f.x; } @@ -193,7 +196,7 @@ namespace GraphTask { void add_edge_features_single_fn(task_data&D, float fv, uint32_t fx) { example*node = D.cur_node; size_t k = (size_t) D.neighbor_predictions[0]; - feature f = { fv, (uint32_t) (( (fx >> D.ss) + 348919043 * k ) << D.ss) }; + feature f = { fv, (uint32_t) (( ((fx & D.mask) >> D.ss) + 348919043 * k ) << D.ss) }; node->atomics[neighbor_namespace].push_back(f); node->sum_feat_sq[neighbor_namespace] += f.x * f.x; // TODO: audit @@ -218,6 +221,7 @@ namespace GraphTask { if (pred_total == 0.) continue; //for (size_t k=0; kinitial_t + a.all->sd->weighted_examples - a.all->sd->weighted_unlabeled_examples); avg_loss = (float)(a.all->sd->sum_loss/k + sqrt((1.+0.5*log(k))/(weighted_queries+0.0001))); - bias = get_active_coin_bias(k, avg_loss, ec.revert_weight/k, a.active_c0); + bias = get_active_coin_bias(k, avg_loss, ec_revert_weight/k, a.active_c0); } if(frand48() < bias) return 1.f / bias; @@ -50,7 +46,7 @@ float get_active_coin_bias(float k, float avg_loss, float g, float c0) float k = ec.example_t - ec.l.simple.weight; ec.revert_weight = all.loss->getRevertingWeight(all.sd, ec.pred.scalar, all.eta/powf(k,all.power_t)); - float importance = query_decision(a, ec, k); + float importance = query_decision(a, ec.revert_weight, k); if(importance > 0){ all.sd->queries += 1; @@ -111,7 +107,7 @@ float get_active_coin_bias(float k, float avg_loss, float g, float c0) float ai=-1; if(ld.label == FLT_MAX) - ai=query_decision(a, ec, (float)all.sd->weighted_unlabeled_examples); + ai=query_decision(a, ec.revert_weight, (float)all.sd->weighted_unlabeled_examples); all.print(all.raw_prediction, ec.partial_prediction, -1, ec.tag); for (size_t i = 0; i& a, const pair& b) { return ((a.first == b.first) && (a.second < b.second)) || (a.first < b.first); } void get_training_timesteps(search_private& priv, v_array< pair >& timesteps) { // timesteps are pairs of (beam elem, t) where beam elem == 0 means "default" for non-beam search timesteps.erase(); - + + // if there's active learning, we need to + if (priv.subsample_timesteps <= -1) { + for (size_t t=0; trevert_weight = priv.all->loss->getRevertingWeight(priv.all->sd, priv.ec_seq[t].pred.scalar, priv.all->eta / powf(k, priv.all->power_t)); + float importance = query_decision(active_str, *priv.ec_seq[t], k); + if (importance > 0.) + timesteps.push_back(pair(0,t)); + } + } // if there's no subsampling to do, just return [0,T) - if (priv.subsample_timesteps <= 0) + else if (priv.subsample_timesteps <= 0) for (size_t t=0; t(0,t)); @@ -1865,7 +1877,7 @@ namespace Search { ("search_trained_nb_policies", po::value(), "the number of trained policies in a file") ("search_allowed_transitions",po::value(),"read file of allowed transitions [def: all transitions are allowed]") - ("search_subsample_time", po::value(), "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example, if v<0, use active learning") + ("search_subsample_time", po::value(), "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example, if v<=-1, use active learning") ("search_neighbor_features", po::value(), "copy features from neighboring lines. argument looks like: '-1:a,+2' meaning copy previous line namespace a and next next line from namespace _unnamed_, where ',' separates them") ("search_rollout_num_steps", po::value(), "how many calls of \"loss\" before we stop really predicting on rollouts and switch to oracle (def: 0 means \"infinite\")") ("search_history_length", po::value(), "some tasks allow you to specify how much history their depend on; specify that here [def: 1]") From 83eda56c1498b4e63bba79decffb8cd878a18882 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Tue, 17 Feb 2015 11:59:22 -0500 Subject: [PATCH 03/13] faster search, fewer predictions needed, much faster beam --- Makefile | 4 ++-- test/train-sets/ref/search_er.stderr | 4 ++-- vowpalwabbit/beam.h | 12 +++++----- vowpalwabbit/search.cc | 35 ++++++++++++++++++++++++---- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 8521fc08396..28a091774ab 100644 --- a/Makefile +++ b/Makefile @@ -66,8 +66,8 @@ endif FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 -DNDEBUG $(BOOST_INCLUDE) -fPIC #-DVW_LDA_NO_SSE # for profiling -- note that it needs to be gcc -FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -O2 -fno-strict-aliasing -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -pg -fPIC #-DVW_LDA_NO_S -CXX = g++ +#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -O2 -fno-strict-aliasing -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -pg -fPIC #-DVW_LDA_NO_S +#CXX = g++ # for valgrind / gdb debugging #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0 -fPIC diff --git a/test/train-sets/ref/search_er.stderr b/test/train-sets/ref/search_er.stderr index 27204d74ce9..a5e0767e786 100644 --- a/test/train-sets/ref/search_er.stderr +++ b/test/train-sets/ref/search_er.stderr @@ -11,7 +11,7 @@ loss last counter output prefix output prefix p 1.000000 1.000000 1 [4 ] [1 ] 0 0 1 0 1 0.000000 2.000000 3.000000 2 [2 4 2 5 10 10 ] [4 4 4 10 10 10 ] 0 0 7 0 7 0.000000 2.875000 3.750000 4 [1 4 4 1 10 10 10 1..] [4 4 4 4 10 10 10 1..] 0 0 32 0 32 0.000000 -1.437500 0.000000 8 [1 4 4 1 10 10 10 1..] [1 4 4 1 10 10 10 1..] 1 0 42 0 64 0.000001 +1.437500 0.000000 8 [1 4 4 1 10 10 10 1..] [1 4 4 1 10 10 10 1..] 1 0 64 0 64 0.000001 finished run number of examples per pass = 4 @@ -19,4 +19,4 @@ passes used = 3 weighted example sum = 12 weighted label sum = 0 average loss = 0.958333 -total feature number = 522 +total feature number = 1185 diff --git a/vowpalwabbit/beam.h b/vowpalwabbit/beam.h index 71f45544deb..e5a45c701c2 100644 --- a/vowpalwabbit/beam.h +++ b/vowpalwabbit/beam.h @@ -91,16 +91,16 @@ inline int compare_on_hash_then_cost(const void *void_a, const void *void_b) { if (beam_size == 1) do_kbest = false; // automatically turn of kbest } - bool insert(T*data, float cost, uint32_t hash) { // returns TRUE iff element was actually added + bool might_insert(float cost) { bool should_add = false; - if (count < beam_size) should_add = true; else if (cost < worst_cost) should_add = true; if (cost > prune_if_gt) should_add = false; - - //cerr << "insert " << ((size_t)data) << " with cost=" << cost << " wc=" << worst_cost << " count=" << count << " size=" << beam_size << " has should_add=" << should_add << endl; - - if (!should_add) return false; + return should_add; + } + + bool insert(T*data, float cost, uint32_t hash) { // returns TRUE iff element was actually added + if (!might_insert(cost)) return false; //bool we_were_worse = false; // if (is_equivalent) { diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index 0c0767919de..ab638bb6b0f 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -38,7 +38,7 @@ namespace Search { NULL }; // must NULL terminate! const bool PRINT_UPDATE_EVERY_EXAMPLE =0; - const bool PRINT_UPDATE_EVERY_PASS =0; + const bool PRINT_UPDATE_EVERY_PASS =1; const bool PRINT_CLOCK_TIME =0; string neighbor_feature_space("neighbor"); @@ -170,6 +170,7 @@ namespace Search { v_array condition_on_actions; v_array< pair > timesteps; v_array learn_losses; + v_array< pair > active_uncertainty; LEARNER::base_learner* base_learner; clock_t start_clock_time; @@ -544,6 +545,8 @@ namespace Search { size_t i = (allowed_actions_cnt > 0) ? allowed_actions[j] : j; if (i == ret) continue; + if (! priv.beam->might_insert( alternative_costs[i] )) continue; + action_prefix* px = new action_prefix; *px = v_init(); px->resize(new_len+1); @@ -709,6 +712,18 @@ namespace Search { priv.base_learner->predict(ec, policy); uint32_t act = ec.pred.multiclass; + if ((priv.state == INIT_TRAIN) && (priv.subsample_timesteps <= -1)) { // active learning + size_t K = cs_get_costs_size(priv.cb_learner, ec.l); + float min_cost = FLT_MAX, min_cost2 = FLT_MAX; + for (size_t k = 0; k < K; k++) { + float cost = cs_get_cost_partial_prediction(priv.cb_learner, ec.l, k); + if (cost < min_cost) { min_cost2 = min_cost; min_cost = cost; } + else if (cost < min_cost2) { min_cost2 = cost; } + } + if (min_cost2 < FLT_MAX) + priv.active_uncertainty.push_back( make_pair(min_cost2 - min_cost, priv.t) ); + } + // in beam search mode, go through alternatives and add them as back-ups if (priv.beam) { float act_cost = 0; @@ -725,6 +740,9 @@ namespace Search { action k_act = cs_get_cost_index(priv.cb_learner, ec.l, k); if (k_act == act) continue; // skip the taken action float delta_cost = cs_get_cost_partial_prediction(priv.cb_learner, ec.l, k) - act_cost + priv.beam_initial_cost; + + if (! priv.beam->might_insert( delta_cost )) continue; + // construct the action prefix action_prefix* px = new v_array; *px = v_init(); @@ -803,6 +821,7 @@ namespace Search { for (size_t k=start_K; kmight_insert( delta_cost )) continue; action_prefix* px = new v_array; *px = v_init(); px->resize(new_len + 1); @@ -1190,17 +1209,20 @@ namespace Search { // if there's active learning, we need to if (priv.subsample_timesteps <= -1) { - for (size_t t=0; t priv.active_uncertainty[i].first) + timesteps.push_back(pair(0, priv.active_uncertainty[i].second - 1)); + /* float k = (float)priv.total_examples_generated; priv.ec_seq[t]->revert_weight = priv.all->loss->getRevertingWeight(priv.all->sd, priv.ec_seq[t].pred.scalar, priv.all->eta / powf(k, priv.all->power_t)); float importance = query_decision(active_str, *priv.ec_seq[t], k); if (importance > 0.) timesteps.push_back(pair(0,t)); - } + */ } // if there's no subsampling to do, just return [0,T) - else if (priv.subsample_timesteps <= 0) + else + if (priv.subsample_timesteps <= 0) for (size_t t=0; t(0,t)); @@ -1412,6 +1434,7 @@ namespace Search { reset_search_structure(priv); priv.state = INIT_TRAIN; + priv.active_uncertainty.erase(); priv.train_trajectory.erase(); // this is where we'll store the training sequence priv.task->run(sch, priv.ec_seq); @@ -1437,6 +1460,7 @@ namespace Search { priv.T = priv.t; if (priv.beam) get_training_timesteps_beam(priv, *final_beam, priv.timesteps); else get_training_timesteps(priv, priv.timesteps); + priv.learn_losses.erase(); size_t last_beam_id = 0; for (size_t tid=0; tid >(); priv.cross_validate = false; priv.A = 1; priv.num_learners = 1; From 09ebea2fbc9dc0b7115b6043ef0f2a094c103621 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Tue, 17 Feb 2015 15:04:35 -0500 Subject: [PATCH 04/13] code cleanup; fixed memory leak in dictionaries --- vowpalwabbit/beam.h | 8 +------- vowpalwabbit/parse_args.cc | 4 ++++ vowpalwabbit/parse_example.cc | 3 ++- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/vowpalwabbit/beam.h b/vowpalwabbit/beam.h index e5a45c701c2..7f177569e93 100644 --- a/vowpalwabbit/beam.h +++ b/vowpalwabbit/beam.h @@ -91,13 +91,7 @@ inline int compare_on_hash_then_cost(const void *void_a, const void *void_b) { if (beam_size == 1) do_kbest = false; // automatically turn of kbest } - bool might_insert(float cost) { - bool should_add = false; - if (count < beam_size) should_add = true; - else if (cost < worst_cost) should_add = true; - if (cost > prune_if_gt) should_add = false; - return should_add; - } + inline bool might_insert(float cost) { return (cost <= prune_if_gt) && ((count < beam_size) || (cost < worst_cost)); } bool insert(T*data, float cost, uint32_t hash) { // returns TRUE iff element was actually added if (!might_insert(cost)) return false; diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index fe90da90240..3a3d984aa7d 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -120,6 +120,10 @@ void parse_dictionary_argument(vw&all, string str) { *arr = v_init(); push_many(*arr, ec->atomics[def].begin, ec->atomics[def].size()); map->put(ss, hash, arr); + + // clear up ec + ec->tag.erase(); ec->indices.erase(); + for (size_t i=0; i<256; i++) { ec->atomics[i].erase(); ec->audit_features[i].erase(); } } dealloc_example(all.p->lp.delete_label, *ec); free(ec); diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index c03097ffb18..3feb5626344 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -41,7 +41,8 @@ class TC_parser { example* ae; uint32_t* affix_features; bool* spelling_features; - v_array spelling; + v_array spelling = v_init(); + vector* namespace_dictionaries; ~TC_parser(){ } From 54f5c0bcc0520b30bd671cd47aaa14c8f1f7fa99 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Wed, 18 Feb 2015 16:09:15 -0500 Subject: [PATCH 05/13] speed up python, minor tweak to output on beam --- python/pylibvw.cc | 67 +++++++++++++++++++++++++++++------------- python/pyvw.py | 5 ++-- vowpalwabbit/search.cc | 13 ++++---- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/python/pylibvw.cc b/python/pylibvw.cc index 798eb44aae1..2decbe98826 100644 --- a/python/pylibvw.cc +++ b/python/pylibvw.cc @@ -178,21 +178,23 @@ void ex_push_feature_list(example_ptr ec, vw_ptr vw, unsigned char ns, py::list& else { cerr << "warning: malformed feature in list" << endl; continue; } ai = fv[0]; } - - bool got = false; - py::extract get_int(ai); - if (get_int.check()) { f.weight_index = get_int(); got = true; } - else { + + if (f.x != 0.) { + bool got = false; py::extract get_str(ai); if (get_str.check()) { f.weight_index = VW::hash_feature(*vw, get_str(), ns_hash); got = true; - } else { cerr << "warning: malformed feature in list" << endl; continue; } - } - if (got && (f.x != 0.)) { - ec->atomics[ns].push_back(f); - count++; - sum_sq += f.x * f.x; + } else { + py::extract get_int(ai); + if (get_int.check()) { f.weight_index = get_int(); got = true; } + else { cerr << "warning: malformed feature in list" << endl; continue; } + } + if (got) { + ec->atomics[ns].push_back(f); + count++; + sum_sq += f.x * f.x; + } } } ec->num_features += count; @@ -200,15 +202,6 @@ void ex_push_feature_list(example_ptr ec, vw_ptr vw, unsigned char ns, py::list& ec->total_sum_feat_sq += sum_sq; } -bool ex_pop_feature(example_ptr ec, unsigned char ns) { - if (ec->atomics[ns].size() == 0) return false; - feature f = ec->atomics[ns].pop(); - ec->num_features--; - ec->sum_feat_sq[ns] -= f.x * f.x; - ec->total_sum_feat_sq -= f.x * f.x; - return true; -} - void ex_push_namespace(example_ptr ec, unsigned char ns) { ec->indices.push_back(ns); } @@ -219,6 +212,39 @@ void ex_ensure_namespace_exists(example_ptr ec, unsigned char ns) { ex_push_namespace(ec, ns); } +void ex_push_dictionary(example_ptr ec, vw_ptr vw, py::dict& dict) { + py::object objectKey, objectVal; + const py::object objectKeys = dict.iterkeys(); + const py::object objectVals = dict.itervalues(); + unsigned long ulCount = boost::python::extract(dict.attr("__len__")()); + for (size_t u=0; uob_type->tp_name[0]; + if (chCheckKey != 's') continue; + chCheckKey = objectVal.ptr()->ob_type->tp_name[0]; + if (chCheckKey != 'l') continue; + + py::extract ns_e(objectKey); + if (ns_e().length() < 1) continue; + py::extract list_e(objectVal); + py::list list = list_e(); + char ns = ns_e()[0]; + ex_ensure_namespace_exists(ec, ns); + ex_push_feature_list(ec, vw, ns, list); + } +} + +bool ex_pop_feature(example_ptr ec, unsigned char ns) { + if (ec->atomics[ns].size() == 0) return false; + feature f = ec->atomics[ns].pop(); + ec->num_features--; + ec->sum_feat_sq[ns] -= f.x * f.x; + ec->total_sum_feat_sq -= f.x * f.x; + return true; +} + bool ex_pop_namespace(example_ptr ec) { if (ec->indices.size() == 0) return false; unsigned char ns = ec->indices.pop(); @@ -513,6 +539,7 @@ BOOST_PYTHON_MODULE(pylibvw) { .def("push_hashed_feature", &ex_push_feature, "Add a hashed feature to a given namespace (id=character-ord)") .def("push_feature_list", &ex_push_feature_list, "Add a (Python) list of features to a given namespace") + .def("push_feature_dict", &ex_push_dictionary, "Add a (Python) dictionary of namespace/feature-list pairs") .def("pop_feature", &ex_pop_feature, "Remove the top feature from a given namespace; returns True iff the list was non-empty") .def("push_namespace", &ex_push_namespace, "Add a new namespace") .def("ensure_namespace_exists", &ex_ensure_namespace_exists, "Add a new namespace if it doesn't already exist") diff --git a/python/pyvw.py b/python/pyvw.py index 7a68a287d18..9205e04993a 100644 --- a/python/pyvw.py +++ b/python/pyvw.py @@ -406,8 +406,9 @@ def __init__(self, vw, initStringOrDict=None, labelType=pylibvw.vw.lDefault): self.stride = vw.get_stride() self.finished = False self.setup_done = False - for ns_char,feats in initStringOrDict.iteritems(): - self.push_features(ns_char, feats) + #for ns_char,feats in initStringOrDict.iteritems(): + # self.push_features(ns_char, feats) + self.push_feature_dict(vw, initStringOrDict) self.setup_example() else: raise TypeError('expecting string or dict as argument for example construction') diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index ab638bb6b0f..a6db0539371 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -1360,15 +1360,18 @@ namespace Search { for (; best != final_beam->end(); ++best) if (best->active) { new_tag.erase(); - new_tag.resize(50, true); - int len = sprintf(new_tag.begin, "%-10.6f\t", best->cost); - new_tag.end = new_tag.begin + len; + if (priv.kbest > 1) { + new_tag.resize(50, true); + int len = sprintf(new_tag.begin, "%-10.6f\t", best->cost); + new_tag.end = new_tag.begin + len; + } push_many(new_tag, priv.ec_seq[0]->tag.begin, priv.ec_seq[0]->tag.size()); for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; ++sink) all.print_text((int)*sink, best->data->second, new_tag); } - for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; ++sink) - all.print_text((int)*sink, "", priv.ec_seq[0]->tag); + if (priv.kbest > 1) + for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; ++sink) + all.print_text((int)*sink, "", priv.ec_seq[0]->tag); new_tag.delete_v(); } From 54ca541ca2ccbc774a05bd7cb2ad877fa61ce836 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Wed, 18 Feb 2015 23:43:31 -0500 Subject: [PATCH 06/13] bug fix on learner_id, minor code cleanups --- vowpalwabbit/search.cc | 5 +- vowpalwabbit/search_sequencetask.cc | 136 +++++++++++++++------------- 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index a6db0539371..a34ad426801 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -950,7 +950,7 @@ namespace Search { else if (losses[i] == min_loss) num_min++; if (losses[i] > max_loss) { max_loss = losses[i]; } } - + int learner = select_learner(priv, priv.current_policy, priv.learn_learner_id, true); if (!priv.is_ldf) { // not LDF @@ -1148,6 +1148,7 @@ namespace Search { a = choose_oracle_action(priv, ec_cnt, oracle_actions, oracle_actions_cnt, allowed_actions, allowed_actions_cnt, priv.beam && (priv.state != INIT_TEST)); if ((policy >= 0) || gte_here) { + priv.learn_learner_id = learner_id; int learner = select_learner(priv, policy, learner_id, false); ensure_size(priv.condition_on_actions, condition_on_cnt); @@ -2121,7 +2122,7 @@ namespace Search { learner& l = init_learner(&sch, base, search_predict_or_learn, search_predict_or_learn, - priv.total_number_of_policies); + priv.total_number_of_policies * priv.num_learners); l.set_finish_example(finish_example); l.set_end_examples(end_examples); l.set_finish(search_finish); diff --git a/vowpalwabbit/search_sequencetask.cc b/vowpalwabbit/search_sequencetask.cc index 8b361aefcb0..712e481f388 100644 --- a/vowpalwabbit/search_sequencetask.cc +++ b/vowpalwabbit/search_sequencetask.cc @@ -87,63 +87,66 @@ namespace SequenceSpanTask { EncodingType encoding; v_array allowed_actions; v_array only_two_allowed; // used for BILOU encoding + size_t multipass; }; void initialize(Search::search& sch, size_t& num_actions, po::variables_map& vm) { - task_data * my_task_data = new task_data(); + task_data * D = new task_data(); po::options_description sspan_opts("search sequencespan options"); sspan_opts.add_options()("search_span_bilou", "switch to (internal) BILOU encoding instead of BIO encoding"); + sspan_opts.add_options()("search_span_multipass", po::value(&(D->multipass))->default_value(1), "do multiple passes"); sch.add_program_options(vm, sspan_opts); if (vm.count("search_span_bilou")) { cerr << "switching to BILOU encoding for sequence span labeling" << endl; - my_task_data->encoding = BILOU; + D->encoding = BILOU; num_actions = num_actions * 2 - 1; } else - my_task_data->encoding = BIO; + D->encoding = BIO; - my_task_data->allowed_actions.erase(); + D->allowed_actions.erase(); - if (my_task_data->encoding == BIO) { - my_task_data->allowed_actions.push_back(1); + if (D->encoding == BIO) { + D->allowed_actions.push_back(1); for (action l=2; lallowed_actions.push_back(l); - my_task_data->allowed_actions.push_back(1); // push back an extra 1 that we can overwrite later if we want - } else if (my_task_data->encoding == BILOU) { - my_task_data->allowed_actions.push_back(1); + D->allowed_actions.push_back(l); + D->allowed_actions.push_back(1); // push back an extra 1 that we can overwrite later if we want + } else if (D->encoding == BILOU) { + D->allowed_actions.push_back(1); for (action l=2; lallowed_actions.push_back(l); - my_task_data->allowed_actions.push_back(l+1); + D->allowed_actions.push_back(l); + D->allowed_actions.push_back(l+1); } - my_task_data->only_two_allowed.push_back(0); - my_task_data->only_two_allowed.push_back(0); + D->only_two_allowed.push_back(0); + D->only_two_allowed.push_back(0); } - sch.set_task_data(my_task_data); + sch.set_task_data(D); sch.set_options( Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please Search::AUTO_HAMMING_LOSS | // please just use hamming loss on individual predictions -- we won't declare loss Search::EXAMPLES_DONT_CHANGE | // we don't do any internal example munging 0); + sch.set_num_learners(D->multipass); } void finish(Search::search& sch) { - task_data * my_task_data = sch.get_task_data(); - my_task_data->allowed_actions.delete_v(); - my_task_data->only_two_allowed.delete_v(); - delete my_task_data; + task_data* D = sch.get_task_data(); + D->allowed_actions.delete_v(); + D->only_two_allowed.delete_v(); + delete D; } void setup(Search::search& sch, vector& ec) { - task_data * my_task_data = sch.get_task_data(); - if (my_task_data->encoding == BILOU) + task_data& D = *sch.get_task_data(); + if (D.encoding == BILOU) convert_bio_to_bilou(ec); } void takedown(Search::search& sch, vector& ec) { - task_data * my_task_data = sch.get_task_data(); + task_data& D = *sch.get_task_data(); - if (my_task_data->encoding == BILOU) + if (D.encoding == BILOU) for (size_t n=0; nl.multi; ylab.label = bilou_to_bio(ylab.label); @@ -151,40 +154,45 @@ namespace SequenceSpanTask { } void run(Search::search& sch, vector& ec) { - task_data * my_task_data = sch.get_task_data(); - action last_prediction = 1; - v_array * y_allowed = &(my_task_data->allowed_actions); - - for (size_t i=0; il.multi.label; - size_t len = y_allowed->size(); - Search::predictor P(sch, (ptag)i+1); - if (my_task_data->encoding == BIO) { - if (last_prediction == 1) P.set_allowed(y_allowed->begin, len-1); - else if (last_prediction % 2 == 0) { (*y_allowed)[len-1] = last_prediction+1; P.set_allowed(*y_allowed); } - else { (*y_allowed)[len-1] = last_prediction; P.set_allowed(*y_allowed); } - if ((oracle > 1) && (oracle % 2 == 1) && (last_prediction != oracle) && (last_prediction != oracle-1)) - oracle = 1; // if we are supposed to I-X, but last wasn't B-X or I-X, then say O - } else if (my_task_data->encoding == BILOU) { - if ((last_prediction == 1) || ((last_prediction-2) % 4 == 0) || ((last_prediction-2) % 4 == 3)) { // O or unit-X or last-X - P.set_allowed(my_task_data->allowed_actions); - // we cannot allow in-X or last-X next - if ((oracle > 1) && (((oracle-2) % 4 == 2) || ((oracle-2) % 4 == 3))) - oracle = 1; - } else { // begin-X or in-X - action other = ((last_prediction-2) % 4 == 1) ? (last_prediction+2) : last_prediction; - P.set_allowed(last_prediction+1); - P.add_allowed(other); - if ((oracle != last_prediction+1) && (oracle != other)) - oracle = other; + task_data& D = *sch.get_task_data(); + v_array * y_allowed = &(D.allowed_actions); + + for (size_t pass=1; pass<=D.multipass; pass++) { + action last_prediction = 1; + for (size_t i=0; il.multi.label; + size_t len = y_allowed->size(); + Search::predictor P(sch, (ptag)i+1); + P.set_learner_id(pass-1); + if (D.encoding == BIO) { + if (last_prediction == 1) P.set_allowed(y_allowed->begin, len-1); + else if (last_prediction % 2 == 0) { (*y_allowed)[len-1] = last_prediction+1; P.set_allowed(*y_allowed); } + else { (*y_allowed)[len-1] = last_prediction; P.set_allowed(*y_allowed); } + if ((oracle > 1) && (oracle % 2 == 1) && (last_prediction != oracle) && (last_prediction != oracle-1)) + oracle = 1; // if we are supposed to I-X, but last wasn't B-X or I-X, then say O + } else if (D.encoding == BILOU) { + if ((last_prediction == 1) || ((last_prediction-2) % 4 == 0) || ((last_prediction-2) % 4 == 3)) { // O or unit-X or last-X + P.set_allowed(D.allowed_actions); + // we cannot allow in-X or last-X next + if ((oracle > 1) && (((oracle-2) % 4 == 2) || ((oracle-2) % 4 == 3))) + oracle = 1; + } else { // begin-X or in-X + action other = ((last_prediction-2) % 4 == 1) ? (last_prediction+2) : last_prediction; + P.set_allowed(last_prediction+1); + P.add_allowed(other); + if ((oracle != last_prediction+1) && (oracle != other)) + oracle = other; + } } - } - last_prediction = P.set_input(*ec[i]).set_condition_range((ptag)i, sch.get_history_length(), 'p').set_oracle(oracle).predict(); + P.set_input(*ec[i]); + P.set_condition_range((ptag)i, sch.get_history_length(), 'p'); + if (pass > 1) P.add_condition_range((ptag)(i+1+sch.get_history_length()), sch.get_history_length()+1, 'a'); + P.set_oracle(oracle); + last_prediction = P.predict(); - action printed_prediction = (my_task_data->encoding == BIO) ? last_prediction : bilou_to_bio(last_prediction); - - if (sch.output().good()) - sch.output() << printed_prediction << ' '; + if ((pass == D.multipass) && sch.output().good()) + sch.output() << ((D.encoding == BIO) ? last_prediction : bilou_to_bio(last_prediction)) << ' '; + } } } } @@ -197,20 +205,20 @@ namespace ArgmaxTask { }; void initialize(Search::search& sch, size_t& num_actions, po::variables_map& vm) { - task_data* my_task_data = new task_data(); + task_data* D = new task_data(); po::options_description argmax_opts("argmax options"); argmax_opts.add_options() - ("cost", po::value(&(my_task_data->false_negative_cost))->default_value(10.0), "False Negative Cost") - ("negative_weight", po::value(&(my_task_data->negative_weight))->default_value(1), "Relative weight of negative examples") + ("cost", po::value(&(D->false_negative_cost))->default_value(10.0), "False Negative Cost") + ("negative_weight", po::value(&(D->negative_weight))->default_value(1), "Relative weight of negative examples") ("max", "Disable structure: just predict the max"); sch.add_program_options(vm, argmax_opts); - my_task_data->predict_max = vm.count("max") > 0; + D->predict_max = vm.count("max") > 0; - sch.set_task_data(my_task_data); + sch.set_task_data(D); - if (my_task_data->predict_max) + if (D->predict_max) sch.set_options( Search::EXAMPLES_DONT_CHANGE ); // we don't do any internal example munging else sch.set_options( Search::AUTO_CONDITION_FEATURES | // automatically add history features to our examples, please @@ -218,7 +226,7 @@ namespace ArgmaxTask { } void run(Search::search& sch, vector& ec) { - task_data * my_task_data = sch.get_task_data(); + task_data& D = *sch.get_task_data(); uint32_t max_prediction = 1; uint32_t max_label = 1; @@ -227,14 +235,14 @@ namespace ArgmaxTask { for (ptag i=0; ipredict_max ? max_label : ec[i]->l.multi.label; + uint32_t oracle = D.predict_max ? max_label : ec[i]->l.multi.label; uint32_t prediction = sch.predict(*ec[i], i+1, &oracle, 1, &i, "p"); max_prediction = max(prediction, max_prediction); } float loss = 0.; if (max_label > max_prediction) - loss = my_task_data->false_negative_cost / my_task_data->negative_weight; + loss = D.false_negative_cost / D.negative_weight; else if (max_prediction > max_label) loss = 1.; sch.loss(loss); From e58e27a3e6f302e2b7434876a0fa2f4f58b79676 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Fri, 20 Feb 2015 14:47:39 -0500 Subject: [PATCH 07/13] added ability to unsetup example in python interface --- python/pylibvw.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++--- python/pyvw.py | 40 ++++++++++++++++-------------- 2 files changed, 81 insertions(+), 22 deletions(-) diff --git a/python/pylibvw.cc b/python/pylibvw.cc index 2decbe98826..9b9f490284e 100644 --- a/python/pylibvw.cc +++ b/python/pylibvw.cc @@ -245,13 +245,18 @@ bool ex_pop_feature(example_ptr ec, unsigned char ns) { return true; } -bool ex_pop_namespace(example_ptr ec) { - if (ec->indices.size() == 0) return false; - unsigned char ns = ec->indices.pop(); +void ex_erase_namespace(example_ptr ec, unsigned char ns) { ec->num_features -= ec->atomics[ns].size(); ec->total_sum_feat_sq -= ec->sum_feat_sq[ns]; ec->sum_feat_sq[ns] = 0.; ec->atomics[ns].erase(); + ec->audit_features[ns].erase(); +} + +bool ex_pop_namespace(example_ptr ec) { + if (ec->indices.size() == 0) return false; + unsigned char ns = ec->indices.pop(); + ex_erase_namespace(ec, ns); return true; } @@ -259,6 +264,56 @@ void my_setup_example(vw_ptr vw, example_ptr ec) { VW::setup_example(*vw, ec.get()); } +void unsetup_example(vw_ptr vwP, example_ptr ae) { + vw&all = *vwP; + ae->partial_prediction = 0.; + ae->num_features = 0; + ae->total_sum_feat_sq = 0; + ae->loss = 0.; + + if (all.ignore_some) { + cerr << "error: cannot unsetup example when some namespaces are ignored!" << endl; + throw exception(); + } + + if(all.ngram_strings.size() > 0) { + cerr << "error: cannot unsetup example when ngrams are in use!" << endl; + throw exception(); + } + + if (all.add_constant) { + ae->atomics[constant_namespace].erase(); + ae->audit_features[constant_namespace].erase(); + int hit_constant = -1; + size_t N = ae->indices.size(); + for (size_t i=0; iindices[j] == constant_namespace) { + if (hit_constant >= 0) { cerr << "error: hit constant namespace twice!" << endl; throw exception(); } + hit_constant = j; + break; + } + } + if (hit_constant >= 0) { + for (size_t i=hit_constant; iindices[i] = ae->indices[i+1]; + ae->indices.pop(); + } + } + + uint32_t multiplier = all.wpp << all.reg.stride_shift; + if(multiplier != 1) { //make room for per-feature information. + for (unsigned char* i = ae->indices.begin; i != ae->indices.end; i++) + for(feature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++) + j->weight_index /= multiplier; + if (all.audit || all.hash_inv) + for (unsigned char* i = ae->indices.begin; i != ae->indices.end; i++) + for(audit_data* j = ae->audit_features[*i].begin; j != ae->audit_features[*i].end; j++) + j->weight_index /= multiplier; + } +} + + void ex_set_label_string(example_ptr ec, vw_ptr vw, string label, size_t labelType) { // SPEEDUP: if it's already set properly, don't modify label_parser& old_lp = vw->p->lp; @@ -492,6 +547,7 @@ BOOST_PYTHON_MODULE(pylibvw) { .def("hash_feature", &VW::hash_feature, "given a feature string (arg2) and a hashed namespace (arg3), hash that feature") .def("finish_example", &my_finish_example, "tell VW that you're done with a given example") .def("setup_example", &my_setup_example, "given an example that you've created by hand, prepare it for learning (eg, compute quadratic feature)") + .def("unsetup_example", &unsetup_example, "reverse the process of setup, so that you can go back and modify this example") .def("num_weights", &VW::num_weights, "how many weights are we learning?") .def("get_weight", &VW::get_weight, "get the weight for a particular index") @@ -544,6 +600,7 @@ BOOST_PYTHON_MODULE(pylibvw) { .def("push_namespace", &ex_push_namespace, "Add a new namespace") .def("ensure_namespace_exists", &ex_ensure_namespace_exists, "Add a new namespace if it doesn't already exist") .def("pop_namespace", &ex_pop_namespace, "Remove the top namespace off; returns True iff the list was non-empty") + .def("erase_namespace", &ex_erase_namespace, "Remove all the features from a given namespace") .def("set_label_string", &ex_set_label_string, "(Re)assign the label of this example to this string") diff --git a/python/pyvw.py b/python/pyvw.py index 9205e04993a..1be51101f5a 100644 --- a/python/pyvw.py +++ b/python/pyvw.py @@ -265,7 +265,7 @@ def push_feature(self, feature, v=1.): def pop_feature(self): """Remove the top feature from the current namespace; returns True if a feature was removed, returns False if there were no - features to pop. Fails if setup has run.""" + features to pop.""" return self.ex.pop_feature(self.ns) def push_features(self, ns, featureList): @@ -406,10 +406,8 @@ def __init__(self, vw, initStringOrDict=None, labelType=pylibvw.vw.lDefault): self.stride = vw.get_stride() self.finished = False self.setup_done = False - #for ns_char,feats in initStringOrDict.iteritems(): - # self.push_features(ns_char, feats) self.push_feature_dict(vw, initStringOrDict) - self.setup_example() + #self.setup_example() else: raise TypeError('expecting string or dict as argument for example construction') @@ -469,6 +467,13 @@ def setup_example(self): self.vw.setup_example(self) self.setup_done = True + def unsetup_example(self): + """If this example has been setup, reverse that process so you can continue editing the examples.""" + if not self.setup_done: + raise Exception('trying to unsetup_example that has not yet been setup') + self.vw.unsetup_example(self) + self.setup_done = False + def learn(self): """Learn on this example (and before learning, automatically call setup_example if the example hasn't yet been setup).""" @@ -502,42 +507,40 @@ def get_feature_id(self, ns, feature, ns_hash=None): def push_hashed_feature(self, ns, f, v=1.): - """Add a hashed feature to a given namespace (fails if setup - has already run on this example). Fails if setup has run.""" - if self.setup_done: raise Exception("error: modification to example after setup") + """Add a hashed feature to a given namespace.""" + if self.setup_done: self.unsetup_example(); pylibvw.example.push_hashed_feature(self, self.get_ns(ns).ord_ns, f, v) def push_feature(self, ns, feature, v=1., ns_hash=None): - """Add an unhashed feature to a given namespace (fails if - setup has already run on this example).""" + """Add an unhashed feature to a given namespace.""" f = self.get_feature_id(ns, feature, ns_hash) self.push_hashed_feature(ns, f, v) def pop_feature(self, ns): """Remove the top feature from a given namespace; returns True if a feature was removed, returns False if there were no - features to pop. Fails if setup has run.""" - if self.setup_done: raise Exception("error: modification to example after setup") + features to pop.""" + if self.setup_done: self.unsetup_example(); return pylibvw.example.pop_feature(self, self.get_ns(ns).ord_ns) def push_namespace(self, ns): """Push a new namespace onto this example. You should only do this if you're sure that this example doesn't already have the - given namespace. Fails if setup has run.""" - if self.setup_done: raise Exception("error: modification to example after setup") + given namespace.""" + if self.setup_done: self.unsetup_example(); pylibvw.example.push_namespace(self, self.get_ns(ns).ord_ns) def pop_namespace(self): """Remove the top namespace from an example; returns True if a namespace was removed, or False if there were no namespaces - left. Fails if setup has run.""" - if self.setup_done: raise Exception("error: modification to example after setup") + left.""" + if self.setup_done: self.unsetup_example(); return pylibvw.example.pop_namespace(self) def ensure_namespace_exists(self, ns): """Check to see if a namespace already exists. If it does, do - nothing. If it doesn't, add it. Fails if setup has run.""" - if self.setup_done: raise Exception("error: modification to example after setup") + nothing. If it doesn't, add it.""" + if self.setup_done: self.unsetup_example(); return pylibvw.example.ensure_namespace_exists(self, self.get_ns(ns).ord_ns) def push_features(self, ns, featureList): @@ -553,8 +556,7 @@ def push_features(self, ns, featureList): space_hash = vw.hash_space( 'x' ) feat_hash = vw.hash_feature( 'a', space_hash ) ex.push_features('x', [feat_hash]) # note: 'x' should match the space_hash! - - Fails if setup has run.""" + """ ns = self.get_ns(ns) self.ensure_namespace_exists(ns) self.push_feature_list(self.vw, ns.ord_ns, featureList) # much faster just to do it in C++ From e31868d2a39d82960fca3a650d3b1034f693e921 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Fri, 20 Feb 2015 14:47:58 -0500 Subject: [PATCH 08/13] added test of example modification in python --- python/test_partial_example.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 python/test_partial_example.py diff --git a/python/test_partial_example.py b/python/test_partial_example.py new file mode 100644 index 00000000000..f267cd84f01 --- /dev/null +++ b/python/test_partial_example.py @@ -0,0 +1,15 @@ +import pyvw + +vw = pyvw.vw('--audit') +full = vw.example( { 'a': ['b'], 'x': ['y'] } ) +full.learn() + +part = vw.example( {'a': ['b'] } ) +part.learn() + +part.push_features('x', ['y']) +part.learn() + +part.erase_namespace(ord('x')) +part.push_features('x', ['z']) +part.learn() From 7ce724917ee498797511965f2c078b8eb4213da1 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Sat, 28 Feb 2015 10:45:18 -0500 Subject: [PATCH 09/13] add xv to search, separate learners to search_graph --- vowpalwabbit/search.cc | 75 +++++++++++++++++++++--------------- vowpalwabbit/search_graph.cc | 13 +++++-- 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index a34ad426801..14035991b57 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -134,7 +134,7 @@ namespace Search { RollMethod rollout_method; // 0=policy, 1=oracle, 2=mix_per_state, 3=mix_per_roll RollMethod rollin_method; float subsample_timesteps; // train at every time step or just a (random) subset? - bool cross_validate; // train two separate policies -- TODO how should we deal with this at test time? really we want three but that's hard to implement ;) + bool xv; // train three separate policies -- two for providing examples to the other and a third training on the union (which will be used at test time -- TODO) bool allow_current_policy; // should the current policy be used for training? true for dagger bool adaptive_beta; // used to implement dagger-like algorithms. if true, beta = 1-(1-alpha)^n after n updates, and policy is mixed with oracle as \pi' = (1-beta)\pi^* + beta \pi @@ -235,12 +235,12 @@ namespace Search { // for two-fold cross validation, we double the number of learners // and send examples to one or the other depending on the xor of // (is_training) and (example_id % 2) - int select_learner(search_private& priv, int policy, size_t learner_id, bool is_training) { + int select_learner(search_private& priv, int policy, size_t learner_id, bool is_gte, bool global_xv_train) { if (policy<0) return policy; // optimal policy else { int p = (int) (policy*priv.num_learners+learner_id); - if (priv.cross_validate) - p = 2*p + ( is_training ^ (priv.all->sd->example_number % 2) ); + if (priv.xv && !global_xv_train) + p = 2*p + 1 + ( is_gte ^ (priv.all->sd->example_number % 2) ); return p; } } @@ -950,8 +950,6 @@ namespace Search { else if (losses[i] == min_loss) num_min++; if (losses[i] > max_loss) { max_loss = losses[i]; } } - - int learner = select_learner(priv, priv.current_policy, priv.learn_learner_id, true); if (!priv.is_ldf) { // not LDF // since we're not LDF, it should be the case that ec_ref_cnt == 1 @@ -971,39 +969,54 @@ namespace Search { example& ec = priv.learn_ec_ref[0]; polylabel old_label = ec.l; ec.l = labels; - ec.in_use = true; if (add_conditioning) add_example_conditioning(priv, ec, priv.learn_condition_on.begin, priv.learn_condition_on.size(), priv.learn_condition_on_names.begin, priv.learn_condition_on_act.begin); - priv.base_learner->learn(ec, learner); + for (size_t is_global_train=0; is_global_train<=priv.xv; is_global_train++) { + int learner = select_learner(priv, priv.current_policy, priv.learn_learner_id, true, is_global_train); + ec.in_use = true; + priv.base_learner->learn(ec, learner); + } if (add_conditioning) del_example_conditioning(priv, ec); ec.l = old_label; priv.total_examples_generated++; } else { // is LDF assert(losses.size() == priv.learn_ec_ref_cnt); size_t start_K = (priv.is_ldf && LabelDict::ec_is_example_header(priv.learn_ec_ref[0])) ? 1 : 0; - for (action a= (uint32_t)start_K; alearn(*priv.empty_example, learner); - cdbg << "generate_training_example called learn on empty_example" << endl; - for (action a= (uint32_t)start_K; a= 0) || gte_here) { priv.learn_learner_id = learner_id; - int learner = select_learner(priv, policy, learner_id, false); + int learner = select_learner(priv, policy, learner_id, false, priv.state == INIT_TEST); ensure_size(priv.condition_on_actions, condition_on_cnt); for (size_t i=0; i >(); - priv.cross_validate = false; + priv.xv = false; priv.A = 1; priv.num_learners = 1; priv.cb_learner = false; @@ -1914,7 +1927,7 @@ namespace Search { ("search_no_caching", "turn off the built-in caching ability (makes things slower, but technically more safe)") ("search_beam", po::value(), "use beam search (arg = beam size, default 0 = no beam)") ("search_kbest", po::value(), "size of k-best list to produce (must be <= beam size)") - ("search_crossvalidate", "train two separate policies, alternating prediction/learning") + ("search_xv", "train two separate policies, alternating prediction/learning") ; add_options(all); po::variables_map& vm = all.vm; @@ -1946,7 +1959,7 @@ namespace Search { "warning: specified --search_interpolation different than the one loaded from regressor. using loaded value of: ", ""); if (vm.count("search_passes_per_policy")) priv.passes_per_policy = vm["search_passes_per_policy"].as(); - if (vm.count("search_crossvalidate")) priv.cross_validate = true; + if (vm.count("search_xv")) priv.xv = true; if (vm.count("search_alpha")) priv.alpha = vm["search_alpha" ].as(); if (vm.count("search_beta")) priv.beta = vm["search_beta" ].as(); diff --git a/vowpalwabbit/search_graph.cc b/vowpalwabbit/search_graph.cc index cd22cc72323..4b536ef24f7 100644 --- a/vowpalwabbit/search_graph.cc +++ b/vowpalwabbit/search_graph.cc @@ -27,8 +27,7 @@ label:weight |n features ... they are *implicitly* labeled starting at 1. (note the namespace -needn't be called n.) if weight is -omitted it is assumed to be 1.0. +needn't be called n.) if weight is omitted it is assumed to be 1.0. edge lines look like: @@ -52,6 +51,7 @@ namespace GraphTask { size_t num_loops; size_t K; // number of labels, *NOT* including the +1 for 'unlabeled' bool use_structure; + bool separate_learners; // for adding new features size_t mask; // all->reg.weight_mask @@ -75,15 +75,21 @@ namespace GraphTask { po::options_description sspan_opts("search graphtask options"); sspan_opts.add_options()("search_graph_num_loops", po::value(), "how many loops to run [def: 2]"); sspan_opts.add_options()("search_graph_no_structure", "turn off edge features"); + sspan_opts.add_options()("search_graph_separate_learners", "use a different learner for each pass"); sch.add_program_options(vm, sspan_opts); D->num_loops = 2; D->use_structure = true; if (vm.count("search_graph_num_loops")) D->num_loops = vm["search_graph_num_loops"].as(); if (vm.count("search_graph_no_structure")) D->use_structure = false; + if (vm.count("search_graph_separate_learners")) D->separate_learners = true; + if (D->num_loops <= 1) { D->num_loops = 1; D->separate_learners = false; } + D->K = num_actions; D->neighbor_predictions = calloc_or_die(D->K+1); + + if (D->separate_learners) sch.set_num_learners(D->num_loops); sch.set_task_data(D); sch.set_options( Search::AUTO_HAMMING_LOSS ); @@ -186,7 +192,6 @@ namespace GraphTask { for (size_t k=0; k<=D.K; k++) { if (D.neighbor_predictions[k] == 0.) continue; feature f = { fv * D.neighbor_predictions[k], (uint32_t) ((( ((fx & D.mask) >> D.ss) + 348919043 * k ) << D.ss) & D.mask) }; - //cerr << "e: " << fx << " (:= " << ((fx & D.mask) >> D.ss) << ") / " << k << " -> " << f.weight_index << ", w=" << D.weight_vector[f.weight_index] << endl; node->atomics[neighbor_namespace].push_back(f); node->sum_feat_sq[neighbor_namespace] += f.x * f.x; } @@ -221,7 +226,6 @@ namespace GraphTask { if (pred_total == 0.) continue; //for (size_t k=0; kexample_number + 1 << ", forcing 0.0" << endl; - return 0.; + float ret = 0.; + if (ret > sd->max_label) ret = (float)sd->max_label; + if (ret < sd->min_label) ret = (float)sd->min_label; + cerr << "NAN prediction in example " << sd->example_number + 1 << ", forcing " << ret << endl; + return ret; } if ( ret > sd->max_label ) return (float)sd->max_label; diff --git a/vowpalwabbit/loss_functions.cc b/vowpalwabbit/loss_functions.cc index ac924590b61..37c10321f99 100644 --- a/vowpalwabbit/loss_functions.cc +++ b/vowpalwabbit/loss_functions.cc @@ -6,7 +6,6 @@ license as described in the file LICENSE. #include #include #include -#include #include using namespace std; From 1272f8babc6deecdcbe773aef7ea6d1a0305c9f5 Mon Sep 17 00:00:00 2001 From: Hal Daume III Date: Mon, 2 Mar 2015 14:27:24 -0500 Subject: [PATCH 13/13] fixed clang error --- vowpalwabbit/parse_example.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index 2f1e833933c..45bfe13da91 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -43,10 +43,10 @@ class TC_parser { example* ae; uint32_t* affix_features; bool* spelling_features; - v_array spelling = v_init(); + v_array spelling; vector* namespace_dictionaries; - + ~TC_parser(){ } inline float featureValue(){ @@ -303,6 +303,7 @@ class TC_parser { } TC_parser(char* reading_head, char* endLine, vw& all, example* ae){ + spelling = v_init(); if (endLine != reading_head) { this->beginLine = reading_head;