Skip to content
Permalink
Browse files

Optional exception (#1906)

* Add strict_parse flag

* Thread new flag value through parser initialization

* Call THROW in parserWarning if strict_parse set

* Add dataset that exercises malformed example branch

* Throw non-VW exception; vw_exception exits are graceful?

* Add exception_ptr to parser and use to rethrow

* Add unit tests

- negative test for single-threaded mode
- negative test for regular mode
- positive test for regular mode

The two negative tests are to make sure that any parser threading
still interacts with exception_ptr correctly

* Create and use a negative-test.sh wrapper script

* Daemon test flake?

* C# test flake?

* Try adding negative-test.sh tests to Windows blacklist

* Add strict_parse_exception and use in place of domain_error

* fixup! Try adding negative-test.sh tests to Windows blacklist

* disable test case in windows

* fix nonwindows tests

* Update to vw_exception (#1907)

* Change to vw_exception and bubble up

* fix negate, change file to just name, put back rethrow

* undo change

* Fix negation (#1909)
  • Loading branch information...
JohnLangford committed Jun 5, 2019
1 parent a4475d5 commit c9be7015dac81245d0c18a77c56ea60d50e4eddc
@@ -17,13 +17,14 @@ var lines = File.ReadAllLines(Path.Combine(testRoot, "RunTests"))

var skipList = new[] { 13, 32, 39, 40, 41, 59, 60, 61, 66, 68, 90,
25, 26, // crash
92, 95, 96, 98, 91, 99, 118, 119, 120,
92, 95, 96, 98, 91, 99, 118, 119, 120,
176, 177, //depend on shell scripts for input/output
14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, // float delta
71, // --examples to test parser
143, 144, 146, 158, 189, // native json parsing
149, 152, 156, // bash script
188 // possibly float delta
143, 144, 146, 158, 189, // native json parsing
149, 152, 156, 193, 194, // bash script
188, // possibly float delta
195 //--onethread is a shell option, not available via library
};

var outputModels = new Dictionary<string, TestCase>();
@@ -113,8 +114,8 @@ namespace cs_unittest
[TestClass]
public partial class RunTests : TestBase
{
<# foreach (var mainTestcase in testcases.Values) {
if (mainTestcase.Id == 0)
<# foreach (var mainTestcase in testcases.Values) {
if (mainTestcase.Id == 0)
continue;
#>
[TestMethod]
@@ -1719,12 +1719,24 @@ printf '3 |f a b c |e x y z\n2 |f a y c |e x\n' | {VW} --oaa 3 -q ef --audit
test-sets/ref/ftrl_coin.stderr
pred-sets/ref/ftrl_coin.predict
# Test 193: online contextual memory tree
# Test 193: malformed examples, onethread, strict_parse failure
./negative-test.sh {VW} -d train-sets/malformed.dat --onethread --strict_parse
train-sets/ref/malformed-onethread-strict_parse.stderr
# Test 194: malformed examples, strict_parse failure
./negative-test.sh {VW} -d train-sets/malformed.dat --strict_parse
train-sets/ref/malformed-strict_parse.stderr
# Test 195: malformed examples success
{VW} -d train-sets/malformed.dat --onethread
train-sets/ref/malformed.stderr
# Test 196: online contextual memory tree
{VW} -d train-sets/rcv1_smaller.dat --memory_tree 10 --learn_at_leaf 1 --max_number_of_labels 2 --dream_at_update 0 --dream_repeats 3 --oas 0 --online 1 --leaf_example_multiplier 10 --alpha 0.1 -l 0.001 -b 15 --passes 1 --loss_function squared --holdout_off
train-sets/ref/cmt_rcv1_smaller_online.stderr
# Test 194: offline contextual memory tree
{VW} -d train-sets/rcv1_smaller.dat --memory_tree 10 --learn_at_leaf 1 --max_number_of_labels 2 --dream_at_update 0 --dream_repeats 3 --oas 0 --online 0 --leaf_example_multiplier 10 --alpha 0.1 -l 0.001 -b 15 -c --passes 2 --loss_function squared --holdout_off
# Test 197: offline contextual memory tree
{VW} -d train-sets/rcv1_smaller.dat --memory_tree 10 --learn_at_leaf 1 --max_number_of_labels 2 --dream_at_update 0 --dream_repeats 3 --oas 0 --online 0 --leaf_example_multiplier 10 --alpha 0.1 -l 0.001 -b 15 -c --passes 2 --loss_function squared --holdout_off
train-sets/ref/cmt_rcv1_smaller_offline.stderr
# Do not delete this line or the empty line above it
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

# this script runs a command and ensures that its exit code is not zero.

if $@; then
exit 1
else
exit 0
fi
Binary file not shown.
@@ -0,0 +1,13 @@
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = train-sets/malformed.dat
num sources = 1
average since example example current current current
loss last counter weight label predict features
vw example #0(parse_example.cc:85): malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"

vw (parse_example.cc:85): malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"

@@ -0,0 +1,13 @@
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = train-sets/malformed.dat
num sources = 1
average since example example current current current
loss last counter weight label predict features
vw example #0(parse_example.cc:85): malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"

vw (parse_example.cc:85): malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"

@@ -0,0 +1,34 @@
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = train-sets/malformed.dat
num sources = 1
average since example example current current current
loss last counter weight label predict features
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #0: "| x:0.7"
1.000000 1.000000 1 1.0 1.0000 0.0000 2
0.565537 0.131074 2 2.0 0.0000 0.3620 2
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #2: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #2: "| x:0.7"
0.438417 0.311297 4 4.0 0.0000 0.4457 2
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #4: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #4: "| x:0.7"
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #6: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #6: "| x:0.7"
0.332172 0.225927 8 8.0 0.0000 0.4680 2
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #8: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #8: "| x:0.7"
malformed example! '|',space, or EOL expected after : "| x:0.7"in Example #10: "| x:0.7"
malformed example! '|' or EOL expected after : "| x:0.7"in Example #10: "| x:0.7"

finished run
number of examples = 11
weighted example sum = 11.000000
weighted label sum = 6.000000
average loss = 0.290115
best constant = 0.545455
best constant's loss = 0.247934
total feature number = 22
@@ -1356,8 +1356,9 @@ LEARNER::base_learner *lda_setup(options_i &options, vw &all)
size_t minibatch2 = next_pow2(ld->minibatch);
if(minibatch2 > all.p->ring_size)
{
bool previous_strict_parse = all.p->strict_parse;
delete all.p;
all.p = new parser{minibatch2};
all.p = new parser{minibatch2, previous_strict_parse};
}

ld->v.resize(all.lda * ld->minibatch);
@@ -131,13 +131,19 @@ int main(int argc, char* argv[])

for (vw* v : alls)
{
if (v->p->exc_ptr)
{
std::rethrow_exception(v->p->exc_ptr);
}

VW::sync_stats(*v);
VW::finish(*v);
}
}
catch (VW::vw_exception& e)
{
cerr << "vw (" << e.Filename() << ":" << e.LineNumber() << "): " << e.what() << endl;
exit(1);
}
catch (exception& e)
{
@@ -1303,12 +1303,14 @@ vw& parse_args(options_i& options, trace_message_t trace_listener, void* trace_c
{
time(&all.init_time);

bool strict_parse = false;
size_t ring_size;
option_group_definition vw_args("VW options");
vw_args.add(make_option("ring_size", ring_size).default_value(256).help("size of example ring"));
vw_args.add(make_option("ring_size", ring_size).default_value(256).help("size of example ring"))
.add(make_option("strict_parse", strict_parse).help("throw on malformed examples"));
options.add_and_parse(vw_args);

all.p = new parser{ring_size};
all.p = new parser{ring_size, strict_parse};

option_group_definition update_args("Update options");
update_args.add(make_option("learning_rate", all.eta).help("Set learning rate").short_name("l"))
@@ -50,10 +50,16 @@ inline void parse_dispatch(vw& all, dispatch_fptr dispatch)
{
std::cerr << "vw example #" << example_number << "(" << e.Filename() << ":" << e.LineNumber() << "): " << e.what()
<< std::endl;

// Stash the exception so it can be thrown on the main thread.
all.p->exc_ptr = std::current_exception();
}
catch (std::exception& e)
{
std::cerr << "vw: example #" << example_number << e.what() << std::endl;

// Stash the exception so it can be thrown on the main thread.
all.p->exc_ptr = std::current_exception();
}
lock_done(*all.p);
examples.delete_v();
@@ -77,9 +77,15 @@ class TC_parser

inline void parserWarning(const char* message, char* begin, char* pos, const char* message2)
{
cerr << message << std::string(begin, pos - begin).c_str() << message2 << "in Example #"
<< this->p->end_parsed_examples << ": \"" << std::string(this->beginLine, this->endLine).c_str() << "\""
<< endl;
std::stringstream ss;
ss << message << std::string(begin, pos - begin).c_str() << message2 << "in Example #"
<< this->p->end_parsed_examples << ": \"" << std::string(this->beginLine, this->endLine).c_str() << "\""
<< endl;
if (p->strict_parse) {
THROW_EX(VW::strict_parse_exception, ss.str());
} else {
cerr << ss.str();
}
}

inline float featureValue()
@@ -36,8 +36,8 @@ struct example_initializer

struct parser
{
parser(size_t ring_size)
: example_pool{ring_size}, ready_parsed_examples{ring_size}, ring_size{ring_size}
parser(size_t ring_size, bool strict_parse_)
: example_pool{ring_size}, ready_parsed_examples{ring_size}, ring_size{ring_size}, strict_parse{strict_parse_}
{
this->input = new io_buf{};
this->output = new io_buf{};
@@ -103,6 +103,9 @@ struct parser
bool audit = false;
bool decision_service_json = false;
std::shared_ptr<void> jsonp;

bool strict_parse;
std::exception_ptr exc_ptr;
};

void enable_sources(vw& all, bool quiet, size_t passes, input_options& input_options);
@@ -14,6 +14,14 @@ license as described in the file LICENSE.
#define _NOEXCEPT throw()
#endif

#include <string.h>

#ifdef _WIN32
#define __FILENAME__ (strrchr(__FILE__, '\\') ? strrchr(__FILE__, '\\') + 1 : __FILE__)
#else
#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
#endif

namespace VW
{
class vw_exception : public std::exception
@@ -105,6 +113,28 @@ class vw_unrecognised_option_exception : public vw_exception
~vw_unrecognised_option_exception() _NOEXCEPT {}
};

class strict_parse_exception : public vw_exception
{
public:
strict_parse_exception(const char* file, int lineNumber, std::string message)
: vw_exception(file, lineNumber, message)
{
}

strict_parse_exception(const strict_parse_exception& ex) : vw_exception(ex) {}

strict_parse_exception& operator=(const strict_parse_exception& other)
{
// check for self-assignment
if (&other == this)
return *this;
vw_exception::operator=(other);
return *this;
}

~strict_parse_exception() _NOEXCEPT {}
};

#ifdef _WIN32
void vw_trace(const char* filename, int linenumber, const char* fmt, ...);

@@ -136,7 +166,7 @@ bool launchDebugger();
__msg << ", errno = unknown"; \
else \
__msg << ", errno = " << __errmsg; \
throw VW::vw_exception(__FILE__, __LINE__, __msg.str()); \
throw VW::vw_exception(__FILENAME__, __LINE__, __msg.str()); \
}
#else
#define THROWERRNO(args) \
@@ -148,7 +178,7 @@ bool launchDebugger();
__msg << "errno = unknown"; \
else \
__msg << "errno = " << __errmsg; \
throw VW::vw_exception(__FILE__, __LINE__, __msg.str()); \
throw VW::vw_exception(__FILENAME__, __LINE__, __msg.str()); \
}
#endif

@@ -157,14 +187,14 @@ bool launchDebugger();
{ \
std::stringstream __msg; \
__msg << args; \
throw VW::vw_exception(__FILE__, __LINE__, __msg.str()); \
throw VW::vw_exception(__FILENAME__, __LINE__, __msg.str()); \
}

#define THROW_EX(ex, args) \
{ \
std::stringstream __msg; \
__msg << args; \
throw ex(__FILE__, __LINE__, __msg.str()); \
throw ex(__FILENAME__, __LINE__, __msg.str()); \
}

} // namespace VW

0 comments on commit c9be701

Please sign in to comment.
You can’t perform that action at this time.