Skip to content

Commit

Permalink
Merge pull request #38 from haberman/decoderfix2
Browse files Browse the repository at this point in the history
Added lots of decoder tests and fixed lots of bugs.
  • Loading branch information
haberman committed Aug 19, 2015
2 parents 782670e + 1e87095 commit 77d45ed
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 61 deletions.
6 changes: 2 additions & 4 deletions tests/json/test_json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,17 +295,15 @@ void test_json_roundtrip_message(const char* json_src,
upb::json::Parser* parser =
upb::json::Parser::Create(env.env(), printer->input());
env.ResetBytesSink(parser->input());
env.Reset(json_src, strlen(json_src), false);
env.Reset(json_src, strlen(json_src), false, false);

bool ok = env.Start() &&
env.ParseBuffer(seam) &&
env.ParseBuffer(-1) &&
env.End();

if (!ok) {
fprintf(stderr, "upb parse error: %s\n", env.status().error_message());
}
ASSERT(ok);
ASSERT(env.CheckConsistency());

if (memcmp(json_expected,
data_sink.Data().data(),
Expand Down
107 changes: 82 additions & 25 deletions tests/pb/test_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sstream>

#include "tests/test_util.h"
#include "tests/upb_test.h"
Expand Down Expand Up @@ -176,6 +177,13 @@ string cat(const string& a, const string& b,
return ret;
}

template <typename T>
string num2string(T num) {
std::ostringstream ss;
ss << num;
return ss.str();
}

string varint(uint64_t x) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t len = upb_vencode64(x, buf);
Expand All @@ -202,6 +210,11 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}

string group(uint32_t fn, const string& buf) {
return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
tag(fn, UPB_WIRE_TYPE_END_GROUP));
}

// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
Expand Down Expand Up @@ -557,15 +570,14 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
}

void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
// We could have parsed as many as 10 bytes fewer than what the decoder
// previously accepted, since we can buffer up to 10 partial bytes internally
// before accumulating an entire value.
const int MAX_BUFFERED = 10;

// We can't have parsed more data than the decoder callback is telling us it
// parsed.
ASSERT(decoder.BytesParsed() <= ofs);
ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED));

// The difference between what we've decoded and what the decoder has accepted
// represents the internally buffered amount. This amount should not exceed
// this value which comes from decoder.int.h.
ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES));
}

static bool parse(VerboseParserEnvironment* env,
Expand All @@ -582,7 +594,7 @@ static bool parse(VerboseParserEnvironment* env,
void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
const string& proto, const string* expected_output,
size_t i, size_t j, bool may_skip) {
env->Reset(proto.c_str(), proto.size(), may_skip);
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder->Reset();

testhash = Hash(proto, expected_output, i, j, may_skip);
Expand All @@ -598,7 +610,14 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
PrintBinary(proto);
fprintf(stderr, "\n");
if (expected_output) {
fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
if (test_mode == ALL_HANDLERS) {
fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
} else if (test_mode == NO_HANDLERS) {
fprintf(stderr,
"No handlers are registered, BUT if they were "
"the expected output would be: %s\n",
expected_output->c_str());
}
} else {
fprintf(stderr, "Expected to FAIL\n");
}
Expand All @@ -610,26 +629,20 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
parse(env, *decoder, -1) &&
env->End();

ASSERT(ok == env->status().ok());
ASSERT(env->CheckConsistency());

if (test_mode == ALL_HANDLERS) {
if (expected_output) {
if (output != *expected_output) {
fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
output.c_str(), expected_output->c_str());
}
if (!ok) {
fprintf(stderr, "Failed: %s\n", env->status().error_message());
}
ASSERT(ok);
ASSERT(output == *expected_output);
} else {
if (ok) {
fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
output.c_str());
} else if (filter_hash) {
fprintf(stderr, "Failed as we expected, with message: %s\n",
env->status().error_message());
}
ASSERT(!ok);
}
Expand Down Expand Up @@ -657,6 +670,25 @@ void run_decoder(const string& proto, const string* expected_output) {
const static string thirty_byte_nop = cat(
tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );

// Indents and wraps text as if it were a submessage with this field number
string wrap_text(int32_t fn, const string& text) {
string wrapped_text = text;
size_t pos = 0;
string replace_with = "\n ";
while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
pos != wrapped_text.size() - 1) {
wrapped_text.replace(pos, 1, replace_with);
pos += replace_with.size();
}
wrapped_text = cat(
LINE("<"),
num2string(fn), LINE(":{")
" ", wrapped_text,
LINE("}")
LINE(">"));
return wrapped_text;
}

void assert_successful_parse(const string& proto,
const char *expected_fmt, ...) {
string expected_text;
Expand All @@ -668,16 +700,27 @@ void assert_successful_parse(const string& proto,
// repeat once with no-op padding data at the end of buffer.
run_decoder(proto, &expected_text);
run_decoder(cat( proto, thirty_byte_nop ), &expected_text);

// Test that this also works when wrapped in a submessage or group.
// Indent the expected text one level and wrap it.
string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);

run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
}

void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL);

// Also test that we fail to parse at end-of-submessage, not just
// end-of-message.
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
NULL);
// end-of-message. But skip this if we have no handlers, because in that
// case we won't descend into the submessage.
if (test_mode != NO_HANDLERS) {
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
thirty_byte_nop), NULL);
}
}

void assert_does_not_parse(const string& proto) {
Expand Down Expand Up @@ -905,11 +948,13 @@ void test_invalid() {
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));

// Test exceeding the resource limit of stack depth.
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
if (test_mode != NO_HANDLERS) {
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
}
assert_does_not_parse(buf);
}
assert_does_not_parse(buf);
}

void test_valid() {
Expand Down Expand Up @@ -986,6 +1031,15 @@ void test_valid() {
"<\n>\n");

// Unknown field inside a known submessage.
assert_successful_parse(
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
LINE("<")
LINE("%u:{")
LINE(" <")
LINE(" >")
LINE("}")
LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);

assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
Expand Down Expand Up @@ -1162,7 +1216,9 @@ void test_valid() {
indentbuf(&textbuf, total - i - 1);
textbuf.append(">\n");
}
assert_successful_parse(buf, "%s", textbuf.c_str());
// Have to use run_decoder directly, because we are at max nesting and can't
// afford the extra nesting that assert_successful_parse() will do.
run_decoder(buf, &textbuf);
}

upb::reffed_ptr<const upb::pb::DecoderMethod> NewMethod(
Expand Down Expand Up @@ -1207,10 +1263,11 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
upb::Sink sink(method->dest_handlers(), &closures[0]);
upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink);
env.ResetBytesSink(decoder->input());
env.Reset(testdata[i].data, testdata[i].length, true);
env.Reset(testdata[i].data, testdata[i].length, true, false);
ASSERT(env.Start());
ASSERT(env.ParseBuffer(-1));
ASSERT(env.End());
ASSERT(env.CheckConsistency());
}
}

Expand Down
55 changes: 47 additions & 8 deletions tests/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,35 @@ class VerboseParserEnvironment {
public:
// Pass verbose=true to print detailed diagnostics to stderr.
VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
env_.ReportErrorsTo(&status_);
env_.SetErrorFunction(&VerboseParserEnvironment::OnError, this);
}

void Reset(const char *buf, size_t len, bool may_skip) {
static bool OnError(void *ud, const upb::Status* status) {
VerboseParserEnvironment* env = static_cast<VerboseParserEnvironment*>(ud);

env->saw_error_ = true;

if (env->expect_error_ && env->verbose_) {
fprintf(stderr, "Encountered error, as expected: ");
} else if (!env->expect_error_) {
fprintf(stderr, "Encountered unexpected error: ");
} else {
return false;
}

fprintf(stderr, "%s\n", status->error_message());
return false;
}

void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) {
buf_ = buf;
len_ = len;
ofs_ = 0;
expect_error_ = expect_error;
saw_error_ = false;
end_ok_set_ = false;
skip_until_ = may_skip ? 0 : -1;
skipped_with_null_ = false;
status_.Clear();
}

// The user should call a series of:
Expand All @@ -63,7 +82,26 @@ class VerboseParserEnvironment {
if (verbose_) {
fprintf(stderr, "Calling end()\n");
}
return sink_->End();
end_ok_ = sink_->End();
end_ok_set_ = true;

return end_ok_;
}

bool CheckConsistency() {
/* If we called end (which we should only do when previous bytes are fully
* accepted), then end() should return true iff there were no errors. */
if (end_ok_set_ && end_ok_ != !saw_error_) {
fprintf(stderr, "End() status and saw_error didn't match.\n");
return false;
}

if (expect_error_ && !saw_error_) {
fprintf(stderr, "Expected error but saw none.\n");
return false;
}

return true;
}

bool ParseBuffer(int bytes) {
Expand Down Expand Up @@ -117,7 +155,7 @@ class VerboseParserEnvironment {
}
}

if (!status_.ok())
if (saw_error_)
return false;

if (parsed > bytes && skip_until_ >= 0) {
Expand All @@ -133,22 +171,23 @@ class VerboseParserEnvironment {
sink_ = sink;
}

const upb::Status& status() { return status_; }

size_t ofs() { return ofs_; }
upb::Environment* env() { return &env_; }

bool SkippedWithNull() { return skipped_with_null_; }

private:
upb::Environment env_;
upb::Status status_;
upb::BytesSink* sink_;
const char* buf_;
size_t len_;
bool verbose_;
size_t ofs_;
void *subc_;
bool expect_error_;
bool saw_error_;
bool end_ok_;
bool end_ok_set_;

// When our parse call returns a value greater than the number of bytes
// we passed in, the decoder is indicating to us that the next N bytes
Expand Down
7 changes: 6 additions & 1 deletion upb/pb/compile_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,

if (!sub_m) {
/* Don't emit any code for this field at all; it will be parsed as an
* unknown field. */
* unknown field.
*
* TODO(haberman): we should change this to parse it as a string field
* instead. It will probably be faster, but more importantly, once we
* start vending unknown fields, a field shouldn't be treated as unknown
* just because it doesn't have subhandlers registered. */
return;
}

Expand Down
Loading

0 comments on commit 77d45ed

Please sign in to comment.