Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Decoder redesign in preparation for packed fields and start/endseq.

  • Loading branch information...
commit f74534b42ac9ac8b0ff496cb0da83f1201bbf8da 1 parent 4a99abb
@haberman authored
View
13 Makefile
@@ -77,6 +77,7 @@ CORE= \
src/upb_string.c \
src/upb_def.c \
src/upb_msg.c \
+ src/upb_varint.c \
# Common encoders/decoders -- you're almost certain to want these.
STREAM= \
@@ -211,9 +212,13 @@ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_varint \
- tests/tests
-# tests/test_decoder \
- tests/test_stream \
+ tests/tests \
+
+INTERACTIVE_TESTS= \
+ tests/test_decoder \
+
+# tests/test_stream \
+
SIMPLE_CXX_TESTS= \
tests/test_table
@@ -225,7 +230,7 @@ VARIADIC_TESTS= \
TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
-tests: $(TESTS)
+tests: $(TESTS) $(INTERACTIVE_TESTS)
$(TESTS): $(LIBUPB)
tests/tests: tests/test.proto.pb
View
1  perf-regression-test.py
@@ -10,6 +10,7 @@
# Generate numbers for baseline.
rm -rf perf-tmp
git clone . perf-tmp
+cp perf-tests.sh perf-tmp
(cd perf-tmp && ./perf-tests.sh upb)
cp perf-tmp/perf-tests.out perf-tests.baseline
View
12 perf-tests.sh
@@ -16,20 +16,16 @@ run_with_flags () {
NAME=$2
make clean
- echo "$FLAGS -fprofile-generate" > perf-cppflags
- make upb_benchmarks
- make benchmark
-
- make clean_leave_profile
- echo "$FLAGS -fprofile-use" > perf-cppflags
+ echo "$FLAGS" > perf-cppflags
make upb_benchmarks
make benchmark | sed -e "s/^/$NAME./g" | tee -a perf-tests.out
}
-if [ x`uname -m` = xx86_64 ]; then
+#if [ x`uname -m` = xx86_64 ]; then
run_with_flags "-DNDEBUG -m32" "plain32"
run_with_flags "-DNDEBUG -fomit-frame-pointer -m32" "omitfp32"
-fi
+#fi
run_with_flags "-DNDEBUG " "plain"
run_with_flags "-DNDEBUG -fomit-frame-pointer" "omitfp"
+run_with_flags "-DNDEBUG -DUPB_USE_JIT_X64" "jit"
View
47 src/upb.c
@@ -13,31 +13,30 @@
#include "upb_string.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, allows_delimited, inmemory_type) \
- {alignof(ctype), sizeof(ctype), wire_type, \
- (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
- UPB_TYPE(inmemory_type), #ctype},
+#define TYPE_INFO(wire_type, ctype, inmemory_type) \
+ {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
const upb_type_info upb_types[] = {
- {0, 0, 0, 0, 0, ""}, // There is no type 0.
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1, DOUBLE) // DOUBLE
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1, FLOAT) // FLOAT
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // INT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1, UINT64) // UINT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // INT32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1, UINT64) // FIXED64
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1, UINT32) // FIXED32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1, BOOL) // BOOL
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // STRING
- TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0, MESSAGE) // GROUP
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, MESSAGE) // MESSAGE
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // BYTES
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, UINT32) // UINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, INT32) // ENUM
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1, INT32) // SFIXED32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1, INT64) // SFIXED64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // SINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // SINT64
+ {0, 0, 0, 0, ""}, // There is no type 0.
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
+ TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, INT64) // SINT64
};
#ifdef NDEBUG
@@ -64,7 +63,7 @@ void upb_copyerr(upb_status *to, upb_status *from)
void upb_clearerr(upb_status *status) {
status->code = UPB_OK;
- upb_string_recycle(&status->str);
+ if (status->str) upb_string_recycle(&status->str);
}
void upb_printerr(upb_status *status) {
View
8 src/upb.h
@@ -126,7 +126,6 @@ typedef struct {
uint8_t align;
uint8_t size;
upb_wire_type_t native_wire_type;
- uint8_t allowed_wire_types; // For packable fields, also allows delimited.
uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
char *ctype;
} upb_type_info;
@@ -168,11 +167,11 @@ typedef int32_t upb_strlen_t;
// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
// constant UPB_VALUETYPE_ARRAY to represent an array.
typedef uint8_t upb_valuetype_t;
+#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added!
#define UPB_VALUETYPE_ARRAY 32
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
-#define UPB_TYPE_ENDGROUP 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@@ -231,11 +230,6 @@ UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
extern upb_value UPB_NO_VALUE;
-INLINE void upb_value_setraw(upb_value *val, uint64_t cval) {
- SET_TYPE(val->type, UPB_VALUETYPE_RAW);
- val->val.uint64 = cval;
-}
-
INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) {
assert(val.type == UPB_TYPE(MESSAGE) ||
val.type == UPB_TYPE(STRING) ||
View
463 src/upb_decoder.c
@@ -19,44 +19,50 @@
#include "upb_decoder_x86.h"
#endif
-/* Decoding/Buffering of individual values ************************************/
+// A group continues until an END_GROUP tag is seen.
+#define UPB_GROUPEND UINT32_MAX
+// A non-packed repeated field ends when a diff. field is seen (or submsg end).
+#define UPB_REPEATEDEND (UINT32_MAX-1)
-// Performs zig-zag decoding, which is used by sint32 and sint64.
-INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+// It's unfortunate that we have to micro-manage the compiler this way,
+// especially since this tuning is necessarily specific to one hardware
+// configuration. But emperically on a Core i7, performance increases 30-50%
+// with these annotations. Every instance where these appear, gcc 4.2.1 made
+// the wrong decision and degraded performance in benchmarks.
+#define FORCEINLINE static __attribute__((always_inline))
+#define NOINLINE static __attribute__((noinline))
+
+static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
+
+/* Decoding/Buffering of wire types *******************************************/
#define UPB_MAX_VARINT_ENCODED_SIZE 10
-INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
- d->ptr += len;
-}
+static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
+static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
-INLINE size_t upb_decoder_offset(upb_decoder *d) {
+size_t upb_decoder_offset(upb_decoder *d) {
size_t offset = d->buf_stream_offset;
if (d->buf) offset += (d->ptr - d->buf);
return offset;
}
-INLINE size_t upb_decoder_bufleft(upb_decoder *d) {
- return d->end - d->ptr;
-}
-
-INLINE void upb_dstate_setmsgend(upb_decoder *d) {
- uint32_t end_offset = d->dispatcher.top->end_offset;
- d->submsg_end = (end_offset == UINT32_MAX) ?
- (void*)UINTPTR_MAX : d->buf + end_offset;
+static void upb_decoder_setmsgend(upb_decoder *d) {
+ uint32_t end = d->dispatcher.top->end_offset;
+ d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end;
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
-static bool upb_pullbuf(upb_decoder *d) {
+static void upb_pullbuf(upb_decoder *d, bool need) {
assert(upb_decoder_bufleft(d) == 0);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
d->end = NULL;
- return false;
+ if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF.");
+ upb_decoder_exit(d);
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
@@ -70,290 +76,256 @@ static bool upb_pullbuf(upb_decoder *d) {
d->jit_end = d->end - 20;
upb_string_recycle(&d->tmp);
upb_string_substr(d->tmp, d->bufstr, 0, 0);
- upb_dstate_setmsgend(d);
- return true;
+ upb_decoder_setmsgend(d);
}
// Called only from the slow path, this function copies the next "len" bytes
-// from the stream to "data", adjusting the dstate appropriately.
-static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
+// from the stream to "data", adjusting the decoder state appropriately.
+static void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) {
while (1) {
- size_t to_copy = UPB_MIN(bytes_wanted, upb_decoder_bufleft(d));
+ size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d));
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
- bytes_wanted -= to_copy;
- if (bytes_wanted == 0) return true;
- if (!upb_pullbuf(d)) return false;
+ bytes -= to_copy;
+ if (bytes == 0) return;
+ upb_pullbuf(d, need);
}
}
-// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous
-// bytes available in our current buffer. We don't inline this because we
-// accept that it will be slow and we don't want to pay for two copies of it.
-static bool upb_decode_varint_slow(upb_decoder *d, upb_value *val) {
- char byte = 0x80;
- uint64_t val64 = 0;
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) {
+ uint8_t byte = 0x80;
+ uint64_t u64 = 0;
int bitpos;
- for(bitpos = 0;
- bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1);
- bitpos += 7)
- val64 |= ((uint64_t)byte & 0x7F) << bitpos;
-
- if(bitpos == 70) {
- upb_seterr(d->status, UPB_ERROR,
- "Varint was unterminated after 10 bytes.\n");
- return false;
- } else if (d->status->code == UPB_EOF && bitpos == 0) {
- // Regular EOF.
- return false;
- } else if (d->status->code == UPB_EOF && (byte & 0x80)) {
- upb_seterr(d->status, UPB_ERROR,
- "Provided data ended in the middle of a varint.\n");
- return false;
- } else {
- // Success.
- upb_value_setraw(val, val64);
- return true;
+ for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
+ upb_getbuf(d, &byte, 1, need);
+ u64 |= ((uint64_t)byte & 0x7F) << bitpos;
}
-}
-typedef struct {
- upb_wire_type_t wire_type;
- upb_field_number_t field_number;
-} upb_tag;
+ if(bitpos == 70 && (byte & 0x80)) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
+ upb_decoder_exit(d);
+ }
+ return u64;
+}
-INLINE bool upb_decode_tag(upb_decoder *d, uint32_t *tag) {
+// For tags and delimited lengths, which must be <=32bit and are usually small.
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
const char *p = d->ptr;
- upb_value val;
- // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048).
+ uint32_t ret;
+ uint64_t u64;
+ // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
- *tag = *p & 0x7f;
+ ret = *p & 0x7f;
if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
- *tag |= (*p & 0x7f) << 7;
+ ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
- // Decode a full varint starting over from ptr.
- if (!upb_decode_varint_slow(d, &val)) return false;
- *tag = upb_value_getint64(val);
- p = d->ptr; // Trick the next line into not overwriting us.
+ u64 = upb_decode_varint_slow(d, need);
+ if (u64 > 0xffffffff) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
+ upb_decoder_exit(d);
+ }
+ ret = (uint32_t)u64;
+ p = d->ptr; // Turn the next line into a nop.
done:
upb_decoder_advance(d, p - d->ptr);
- return true;
+ return ret;
}
-INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
+FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
- return false;
+ upb_decoder_exit(d);
}
- upb_value_setraw(val, r.val);
upb_decoder_advance(d, r.p - d->ptr);
- return true;
+ return r.val;
} else {
- return upb_decode_varint_slow(d, val);
+ return upb_decode_varint_slow(d, true);
}
}
-INLINE bool upb_decode_fixed(upb_decoder *d, size_t bytes, upb_value *val) {
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
// Common (fast) case.
memcpy(val, d->ptr, bytes);
upb_decoder_advance(d, bytes);
} else {
- if (!upb_getbuf(d, val, bytes)) return false;
+ upb_getbuf(d, val, bytes, true);
}
- return true;
}
-// "val" initially holds the length of the string, this is replaced by the
-// contents of the string.
-INLINE bool upb_decode_string(upb_decoder *d, upb_value *val,
- upb_string **str) {
- upb_string_recycle(str);
- uint32_t strlen = upb_value_getint32(*val);
+FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+ uint32_t u32;
+ upb_decode_fixed(d, &u32, sizeof(uint32_t));
+ return u32;
+}
+FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+ uint64_t u64;
+ upb_decode_fixed(d, &u64, sizeof(uint64_t));
+ return u64;
+}
+
+INLINE upb_string *upb_decode_string(upb_decoder *d) {
+ upb_string_recycle(&d->tmp);
+ uint32_t strlen = upb_decode_varint32(d, true);
if (upb_decoder_bufleft(d) >= strlen) {
// Common (fast) case.
- upb_string_substr(*str, d->bufstr, d->ptr - d->buf, strlen);
+ upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen);
upb_decoder_advance(d, strlen);
} else {
- if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen))
- return false;
+ upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true);
}
- upb_value_setstr(val, *str);
- return true;
+ return d->tmp;
}
-
-/* The main decoding loop *****************************************************/
-
-extern upb_wire_type_t upb_expected_wire_types[];
-// Returns true if wt is the correct on-the-wire type for ft.
-INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) {
- // This doesn't currently support packed arrays.
- return upb_types[ft].native_wire_type == wt;
+INLINE void upb_pop(upb_decoder *d) {
+ //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND)
+ // upb_dispatch_endseq(&d->dispatcher);
+ d->f = d->dispatcher.top->f;
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ upb_decoder_setmsgend(d);
}
-static upb_flow_t upb_pop(upb_decoder *d) {
- upb_flow_t ret = upb_dispatch_endsubmsg(&d->dispatcher);
- upb_dstate_setmsgend(d);
- return ret;
+INLINE void upb_push(upb_decoder *d, upb_fieldent *f, uint32_t end) {
+ upb_dispatch_startsubmsg(&d->dispatcher, f, end);
+ upb_decoder_setmsgend(d);
}
-static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) {
- if (d->dispatcher.top->f->type == UPB_TYPE(GROUP)) {
- fprintf(stderr, "upb_decoder: Can't skip groups yet.\n");
- abort();
- }
- upb_decoder_advance(d, d->dispatcher.top->end_offset - (d->ptr - d->buf));
+
+/* Decoding of .proto types ***************************************************/
+
+// Technically, we are losing data if we see a 32-bit varint that is not
+// properly sign-extended. We could detect this and error about the data loss,
+// but proto2 does not do this, so we pass.
+
+#define T(type, wt, valtype, convfunc) \
+ INLINE void upb_decode_ ## type(upb_decoder *d, upb_fieldent *f) { \
+ upb_value val; \
+ upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
+ upb_dispatch_value(&d->dispatcher, f, val); \
+ } \
+
+static double upb_asdouble(uint64_t n) { return *(double*)&n; }
+static float upb_asfloat(uint32_t n) { return *(float*)&n; }
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+T(INT32, varint, int32, int32_t)
+T(INT64, varint, int64, int64_t)
+T(UINT32, varint, uint32, uint32_t)
+T(UINT64, varint, uint64, uint64_t)
+T(FIXED32, fixed32, uint32, uint32_t)
+T(FIXED64, fixed64, uint64, uint64_t)
+T(SFIXED32, fixed32, int32, int32_t)
+T(SFIXED64, fixed64, int64, int64_t)
+T(BOOL, varint, bool, bool)
+T(ENUM, varint, int32, int32_t)
+T(DOUBLE, fixed64, double, upb_asdouble)
+T(FLOAT, fixed32, float, upb_asfloat)
+T(SINT32, varint, int32, upb_zzdec_32)
+T(SINT64, varint, int64, upb_zzdec_64)
+T(STRING, string, str, upb_string*)
+
+static void upb_decode_GROUP(upb_decoder *d, upb_fieldent *f) {
+ upb_push(d, f, UPB_GROUPEND);
+}
+static void upb_endgroup(upb_decoder *d, upb_fieldent *f) {
+ (void)f;
upb_pop(d);
- return UPB_CONTINUE;
}
-
-static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f,
- uint32_t end_offset) {
- upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, end_offset);
- upb_dstate_setmsgend(d);
- return flow;
+static void upb_decode_MESSAGE(upb_decoder *d, upb_fieldent *f) {
+ upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
}
-void upb_decoder_decode(upb_decoder *d, upb_status *status) {
- d->status = status;
-#define CHECK_FLOW(expr) \
- switch (expr) { \
- case UPB_BREAK: goto callback_err; \
- case UPB_SKIPSUBMSG: upb_decoder_skipsubmsg(d); continue; \
- default: break; /* continue normally. */ \
- }
-#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
-
- CHECK(upb_pullbuf(d));
- if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
-
- // Main loop: executed once per tag/field pair.
- while(1) {
- // Check for end-of-submessage.
- while (d->ptr >= d->submsg_end) {
- if (d->ptr > d->submsg_end) {
- upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
- goto err;
- }
- CHECK_FLOW(upb_pop(d));
- }
+/* The main decoding loop *****************************************************/
+
+static void upb_unwind(upb_decoder *d) {
+ // TODO.
+ (void)d;
+}
+
+static void upb_delimend(upb_decoder *d) {
+ if (d->ptr > d->submsg_end) {
+ upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
+ upb_decoder_exit(d);
+ }
+ upb_pop(d);
+}
+static void upb_decoder_enterjit(upb_decoder *d) {
+ (void)d;
+#ifdef UPB_USE_JIT_X64
+ if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
-#ifdef UPB_USE_JIT_X64
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
- if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
- //const char *before = d->ptr;
- //fprintf(stderr, "Entering JIT, JIT bytes left: %zd\n", d->jit_end - d->ptr);
- upb_jit_decode(d);
- //fprintf(stderr, "Exiting JIT, parsed %zd bytes\n", d->ptr - before);
- //fprintf(stderr, "ptr: %p, effective_end: %p, jit_end: %p, effective_end-ptr=%d\n",
- // d->ptr, d->effective_end, d->jit_end, d->effective_end - d->ptr);
- }
+ upb_jit_decode(d);
+ }
#endif
+}
- // Parse/handle tag.
- uint32_t tag;
- if (!upb_decode_tag(d, &tag)) {
- if (status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
- // Normal end-of-file.
- upb_clearerr(status);
- upb_dispatch_endmsg(&d->dispatcher, status);
- return;
- } else {
- if (status->code == UPB_EOF) {
- upb_seterr(status, UPB_ERROR,
- "Input ended in the middle of a submessage.");
- }
- goto err;
- }
+INLINE upb_fieldent *upb_decode_tag(upb_decoder *d) {
+ while (1) {
+ uint32_t tag = upb_decode_varint32(d, false);
+ upb_fieldent *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+ if (f) {
+ d->f = f;
+ return f;
}
-
- // Decode wire data. Hopefully this branch will predict pretty well
- // since most types will read a varint here.
- upb_value val;
- uint8_t wire_type = tag & 0x7;
- switch (wire_type) {
- case UPB_WIRE_TYPE_START_GROUP:
- break; // Nothing to do now, below we will push appropriately.
- case UPB_WIRE_TYPE_END_GROUP:
- // Strictly speaking we should also check the field number here.
- if(d->dispatcher.top->f->type != UPB_TYPE(GROUP)) {
- upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
- goto err;
- }
- CHECK_FLOW(upb_pop(d));
- continue; // We have no value to dispatch.
- case UPB_WIRE_TYPE_VARINT:
+ switch (tag & 0x7) {
+ case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
+ case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
- // For the delimited case we are parsing the length.
- CHECK(upb_decode_varint(d, &val));
- break;
- case UPB_WIRE_TYPE_32BIT:
- CHECK(upb_decode_fixed(d, 4, &val));
- break;
- case UPB_WIRE_TYPE_64BIT:
- CHECK(upb_decode_fixed(d, 8, &val));
+ upb_decoder_advance(d, upb_decode_varint32(d, true));
break;
}
+ // TODO: deliver to unknown field callback.
+ while (d->ptr >= d->submsg_end) upb_delimend(d);
+ }
- // Look up field by tag number.
- upb_dispatcher_field *f = upb_dispatcher_lookup(&d->dispatcher, tag);
-
- if (!f) {
- if (wire_type == UPB_WIRE_TYPE_DELIMITED)
- CHECK(upb_decode_string(d, &val, &d->tmp));
- // TODO.
- CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, 0, UPB_NO_VALUE));
- continue;
- }
+ // Have to handle both packed and non-packed sequences of primitives.
+ //if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) {
+ // upb_dispatch_endseq(&d->dispatcher);
+ //} else if (f->is_repeated_primitive) {
+ // if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) {
+ // upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
+ // } else if (d->f != f) {
+ // upb_dispatch_startseq(d, f, UPB_REPEATEDEND);
+ // }
+ //}
+}
- // Perform any further massaging of the data now that we have the field's
- // type. Now we can distinguish strings from submessages, and we know
- // about zig-zag-encoded types.
- // TODO: handle packed encoding.
- // TODO: if we were being paranoid, we could check for 32-bit-varint types
- // that the top 32 bits all match the highest bit of the low 32 bits.
- // If this is not true we are losing data. But the main protobuf library
- // doesn't check this, and it would slow us down, so pass for now.
- switch (f->type) {
- case UPB_TYPE(GROUP):
- CHECK_FLOW(upb_push(d, f, UINT32_MAX));
- continue; // We have no value to dispatch.
- case UPB_TYPE(MESSAGE):
- CHECK_FLOW(upb_push(d, f, upb_value_getuint32(val) + (d->ptr - d->buf)));
- continue; // We have no value to dispatch.
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- CHECK(upb_decode_string(d, &val, &d->tmp));
- break;
- case UPB_TYPE(SINT32):
- upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val)));
- break;
- case UPB_TYPE(SINT64):
- upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val)));
- break;
- default:
-#ifndef NDEBUG
- val.type = upb_types[f->type].inmemory_type;
-#endif
- break; // Other types need no further processing at this point.
- }
- CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val));
+void upb_decoder_onexit(upb_decoder *d) {
+ if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
+ // Normal end-of-file.
+ upb_clearerr(d->status);
+ upb_dispatch_endmsg(&d->dispatcher, d->status);
+ } else {
+ if (d->status->code == UPB_EOF)
+ upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
+}
-callback_err:
- if (upb_ok(status)) {
- upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK");
+void upb_decoder_decode(upb_decoder *d, upb_status *status) {
+ if (sigsetjmp(d->exitjmp, 0)) {
+ upb_decoder_onexit(d);
+ return;
+ }
+ d->status = status;
+ upb_pullbuf(d, true);
+ upb_dispatch_startmsg(&d->dispatcher);
+ while(1) { // Main loop: executed once per tag/field pair.
+ while (d->ptr >= d->submsg_end) upb_delimend(d);
+ upb_decoder_enterjit(d);
+ // if (!d->dispatcher.top->is_packed)
+ upb_fieldent *f = upb_decode_tag(d);
+ f->decode(d, f);
}
-err:
- assert(!upb_ok(status));
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
@@ -363,9 +335,38 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
d->bufstr = NULL;
- d->buf = NULL;
d->tmp = NULL;
upb_string_recycle(&d->tmp);
+
+ // Set function pointers for each field's decode function.
+ for (int i = 0; i < handlers->msgs_len; i++) {
+ upb_msgent *m = &handlers->msgs[i];
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_fieldent *f = upb_inttable_iter_value(i);
+ switch (f->type) {
+ case UPB_TYPE(INT32): f->decode = &upb_decode_INT32; break;
+ case UPB_TYPE(INT64): f->decode = &upb_decode_INT64; break;
+ case UPB_TYPE(UINT32): f->decode = &upb_decode_UINT32; break;
+ case UPB_TYPE(UINT64): f->decode = &upb_decode_UINT64; break;
+ case UPB_TYPE(FIXED32): f->decode = &upb_decode_FIXED32; break;
+ case UPB_TYPE(FIXED64): f->decode = &upb_decode_FIXED64; break;
+ case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break;
+ case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break;
+ case UPB_TYPE(BOOL): f->decode = &upb_decode_BOOL; break;
+ case UPB_TYPE(ENUM): f->decode = &upb_decode_ENUM; break;
+ case UPB_TYPE(DOUBLE): f->decode = &upb_decode_DOUBLE; break;
+ case UPB_TYPE(FLOAT): f->decode = &upb_decode_FLOAT; break;
+ case UPB_TYPE(SINT32): f->decode = &upb_decode_SINT32; break;
+ case UPB_TYPE(SINT64): f->decode = &upb_decode_SINT64; break;
+ case UPB_TYPE(STRING): f->decode = &upb_decode_STRING; break;
+ case UPB_TYPE(BYTES): f->decode = &upb_decode_STRING; break;
+ case UPB_TYPE(GROUP): f->decode = &upb_decode_GROUP; break;
+ case UPB_TYPE(MESSAGE): f->decode = &upb_decode_MESSAGE; break;
+ case UPB_TYPE_ENDGROUP: f->decode = &upb_endgroup; break;
+ }
+ }
+ }
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
View
4 src/upb_decoder.h
@@ -17,6 +17,7 @@
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
+#include <setjmp.h>
#include <stdbool.h>
#include <stdint.h>
#include "upb_stream.h"
@@ -60,6 +61,8 @@ struct _upb_decoder {
// MIN(end, submsg_end)
const char *effective_end;
+ upb_fieldent *f;
+
// Where we will store any errors that occur.
upb_status *status;
@@ -72,6 +75,7 @@ struct _upb_decoder {
char *debug_info;
struct dasm_State *dynasm;
+ sigjmp_buf exitjmp;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it
View
64 src/upb_decoder_x86.dasc
@@ -135,7 +135,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|// Checks PTR for end-of-buffer.
|.macro check_eob, m
| cmp PTR, DECODER->effective_end
-|| if (m->endgroup_f) {
+|| if (m->is_group) {
| jae ->exit_jit
|| } else {
| jae =>m->jit_endofbuf_pclabel
@@ -194,7 +194,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|
|.macro setmsgend, m
| mov rsi, DECODER->jit_end
-|| if (m->endgroup_f) {
+|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
| mov qword DECODER->submsg_end, rax
| mov DECODER->effective_end, rsi
@@ -253,8 +253,8 @@ void upb_reg_jit_gdb(upb_decoder *d) {
// PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
- upb_handlers_msgent *m,
- upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) {
+ upb_msgent *m,
+ upb_fieldent *f, upb_fieldent *next_f) {
int tag_size = upb_value_size(tag);
// PC-label for the dispatch table.
@@ -388,7 +388,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
- upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
+ upb_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
| jmp =>sub_m->jit_startmsg_pclabel;
} else {
@@ -433,10 +433,11 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
}
static int upb_compare_uint32(const void *a, const void *b) {
+ // TODO: always put ENDGROUP at the end.
return *(uint32_t*)a - *(uint32_t*)b;
}
-static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
+static void upb_decoder_jit_msg(upb_decoder *d, upb_msgent *m) {
|=>m->jit_startmsg_pclabel:
// Call startmsg handler (if any):
if (m->startmsg != upb_startmsg_nop) {
@@ -466,32 +467,24 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
-
- upb_handlers_fieldent *last_f = NULL;
+ upb_fieldent *last_f = NULL;
uint32_t last_tag = 0;
for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i];
- upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
- uint32_t tag = upb_vencode(key);
+ upb_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
+ uint32_t tag = upb_vencode32(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag;
last_f = f;
}
+ upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
free(keys);
- if (m->endgroup_f) {
- uint32_t tag = m->endgroup_f->number << 3 | UPB_WIRE_TYPE_END_GROUP;
- upb_decoder_jit_field(d, last_tag, tag, m, last_f, m->endgroup_f);
- upb_decoder_jit_field(d, tag, 0, m, m->endgroup_f, NULL);
- } else {
- upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
- }
-
// --------- New code section (does not fall through) ------------------------
// End-of-buf / end-of-message.
- if (!m->endgroup_f) {
+ if (!m->is_group) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
@@ -560,16 +553,14 @@ static void upb_decoder_jit(upb_decoder *d) {
| callp abort
}
-void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f,
+void upb_decoder_jit_assignfieldlabs(upb_fieldent *f,
uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
-void upb_decoder_jit_assignmsglabs(upb_handlers *h,
- upb_handlers_msgent *m,
- uint32_t *pclabel_count) {
+void upb_decoder_jit_assignmsglabs(upb_msgent *m, uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
@@ -581,30 +572,22 @@ void upb_decoder_jit_assignmsglabs(upb_handlers *h,
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
- upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ upb_fieldent *f = upb_inttable_iter_value(i);
upb_decoder_jit_assignfieldlabs(f, pclabel_count);
- if (f->type == UPB_TYPE(GROUP)) {
- upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
- sub_m->endgroup_f = malloc(sizeof(*sub_m->endgroup_f));
- memcpy(sub_m->endgroup_f, f, sizeof(*f));
- sub_m->endgroup_f->type = UPB_TYPE_ENDGROUP;
- upb_decoder_jit_assignfieldlabs(sub_m->endgroup_f, pclabel_count);
- }
}
// XXX: Won't work for large field numbers; will need to use a upb_table.
- // +2 to cover group case, in case group number is larger than all tags.
- m->tablearray = malloc((m->max_field_number + 2) * sizeof(void*));
+ m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
// Second pass: for messages that have only one parent, link them to the field
// from which they are called.
-void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) {
+void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_msgent *m) {
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
- upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ upb_fieldent *f = upb_inttable_iter_value(i);
if (upb_issubmsgtype(f->type)) {
- upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
+ upb_msgent *sub_m = upb_handlers_getmsgent(h, f);
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
} else {
@@ -621,7 +604,7 @@ void upb_decoder_makejit(upb_decoder *d) {
uint32_t pclabel_count = 1;
upb_handlers *h = d->dispatcher.handlers;
for (int i = 0; i < h->msgs_len; i++)
- upb_decoder_jit_assignmsglabs(h, &h->msgs[i], &pclabel_count);
+ upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count);
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]);
@@ -648,9 +631,9 @@ void upb_decoder_makejit(upb_decoder *d) {
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
- upb_handlers_msgent *m = &h->msgs[i];
+ upb_msgent *m = &h->msgs[i];
for (uint32_t j = 0; j <= m->max_field_number; j++) {
- upb_handlers_fieldent *f = NULL;
+ upb_fieldent *f = NULL;
for (int k = 0; k < 8; k++) {
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
if (f) break;
@@ -662,9 +645,6 @@ void upb_decoder_makejit(upb_decoder *d) {
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
}
}
- if (m->endgroup_f) {
- m->tablearray[m->endgroup_f->number] = d->jit_code + dasm_getpclabel(d, m->endgroup_f->jit_pclabel);
- }
}
dasm_free(d);
View
10 src/upb_def.h
@@ -117,12 +117,14 @@ struct _upb_fielddef {
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
-INLINE bool upb_issubmsg(upb_fielddef *f) {
- return upb_issubmsgtype(f->type);
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+ return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
-INLINE bool upb_isstring(upb_fielddef *f) {
- return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+ return !upb_issubmsgtype(type) && !upb_isstringtype(type);
}
+INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
+INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
INLINE bool upb_isarray(upb_fielddef *f) {
return f->label == UPB_LABEL(REPEATED);
}
View
9 src/upb_msg.c
@@ -207,7 +207,7 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
upb_dispatcher *d);
static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
- upb_dispatcher *d, upb_handlers_fieldent *hf) {
+ upb_dispatcher *d, upb_fieldent *hf) {
#define CHECK_FLOW(x) do { \
upb_flow_t flow = x; if (flow != UPB_CONTINUE) return flow; \
} while(0)
@@ -237,7 +237,7 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
if (!upb_msg_has(msg, f)) continue;
- upb_handlers_fieldent *hf = upb_dispatcher_lookup(d, f->number);
+ upb_fieldent *hf = upb_dispatcher_lookup(d, f->number);
if (!hf) continue;
upb_value val = upb_msg_get(msg, f);
if (upb_isarray(f)) {
@@ -464,11 +464,12 @@ upb_sflow_t upb_msgsink_startsubmsg_r(void *_m, upb_value _fval) {
void upb_msg_regdhandlers(upb_handlers *h) {
upb_register_all(h, NULL, NULL, NULL, NULL, NULL, NULL);
for (int i = 0; i < h->msgs_len; i++) {
- upb_handlers_msgent *m = &h->msgs[i];
+ upb_msgent *m = &h->msgs[i];
upb_inttable_iter iter = upb_inttable_begin(&m->fieldtab);
for(; !upb_inttable_done(iter);
iter = upb_inttable_next(&m->fieldtab, iter)) {
- upb_handlers_fieldent *fe = upb_inttable_iter_value(iter);
+ upb_fieldent *fe = upb_inttable_iter_value(iter);
+ if (fe->type == UPB_TYPE_ENDGROUP) continue;
upb_fielddef *f = upb_value_getfielddef(fe->fval);
uint16_t msg_size = 0;
uint8_t set_flags_bytes = 0;
View
49 src/upb_stream.c
@@ -47,13 +47,13 @@ upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
return UPB_CONTINUE;
}
-static void upb_msgent_init(upb_handlers_msgent *e) {
- upb_inttable_init(&e->fieldtab, 8, sizeof(upb_handlers_fieldent));
+static void upb_msgent_init(upb_msgent *e) {
+ upb_inttable_init(&e->fieldtab, 8, sizeof(upb_fieldent));
e->startmsg = &upb_startmsg_nop;
e->endmsg = &upb_endmsg_nop;
e->unknownval = &upb_unknownval_nop;
- e->endgroup_f = NULL;
e->tablearray = NULL;
+ e->is_group = false;
}
void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
@@ -76,20 +76,19 @@ void upb_handlers_uninit(upb_handlers *h) {
for (int i = 0; i < h->msgs_len; i++) {
upb_inttable_free(&h->msgs[i].fieldtab);
free(h->msgs[i].tablearray);
- free(h->msgs[i].endgroup_f);
}
free(h->msgs);
upb_msgdef_unref(h->toplevel_msgdef);
}
-static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
+static upb_fieldent *upb_handlers_getorcreate_without_fval(
upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) {
uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type;
- upb_handlers_fieldent *f =
- upb_inttable_lookup(&h->msgent->fieldtab, tag);
+ upb_fieldent *f = upb_inttable_lookup(&h->msgent->fieldtab, tag);
if (!f) {
- upb_handlers_fieldent new_f = {false, type, repeated, fieldnum, -1, UPB_NO_VALUE,
- {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0};
+ upb_fieldent new_f = {false, type, repeated,
+ repeated && upb_isprimitivetype(type), fieldnum, -1, UPB_NO_VALUE,
+ {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, NULL};
if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f);
@@ -100,10 +99,10 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
return f;
}
-static upb_handlers_fieldent *upb_handlers_getorcreate(
+static upb_fieldent *upb_handlers_getorcreate(
upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, upb_value fval) {
- upb_handlers_fieldent *f =
+ upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->fval = fval;
return f;
@@ -164,7 +163,7 @@ void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval) {
- upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
+ upb_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
f->endsubmsg = end ? end : &upb_endsubmsg_nop;
}
@@ -172,14 +171,14 @@ void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, int frames) {
assert(frames <= (h->top - h->stack));
- upb_handlers_fieldent *f =
+ upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->msgent_index = (h->top - frames)->msgent_index;
}
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated) {
- upb_handlers_fieldent *f =
+ upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
if (h->top == h->limit) abort(); // TODO: make growable.
++h->top;
@@ -201,6 +200,15 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
assert(f);
h->top->msgdef = upb_downcast_msgdef(f->def);
}
+ if (type == UPB_TYPE(GROUP)) {
+ // Insert a fieldent for ENDGROUP so we can easily dispatch endgroup when
+ // we see it in the submessage.
+ // TODO: assert that no other fields in the group are registered with the
+ // same name or number.
+ upb_register_typed_submsg(h, fieldnum, UPB_TYPE_ENDGROUP, false, NULL, NULL,
+ UPB_NO_VALUE);
+ h->msgent->is_group = true;
+ }
}
void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
@@ -226,15 +234,15 @@ void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) {
/* upb_dispatcher *************************************************************/
-static upb_handlers_fieldent toplevel_f = {
- false, UPB_TYPE(GROUP), false, 0,
+static upb_fieldent toplevel_f = {
+ false, UPB_TYPE(GROUP), false, false, 0,
0, // msgent_index
#ifdef NDEBUG
{{0}},
#else
{{0}, UPB_VALUETYPE_RAW},
#endif
- {NULL}, NULL, 0, 0, 0};
+ {NULL}, NULL, 0, 0, 0, NULL};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
@@ -255,6 +263,7 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
+ d->top->is_packed = false;
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
@@ -285,8 +294,7 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
upb_copyerr(status, &d->status);
}
-upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
- upb_dispatcher_field *f,
+upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fieldent *f,
size_t userval) {
++d->current_depth;
if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG;
@@ -308,6 +316,7 @@ upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->f = f;
d->top->end_offset = userval;
d->top->closure = sflow.closure;
+ d->top->is_packed = false;
d->msgent = upb_handlers_getmsgent(d->handlers, f);
d->dispatch_table = &d->msgent->fieldtab;
return upb_dispatch_startmsg(d);
@@ -319,7 +328,7 @@ upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {
flow = UPB_SKIPSUBMSG;
} else {
assert(d->top > d->stack);
- upb_dispatcher_field *old_f = d->top->f;
+ upb_fieldent *old_f = d->top->f;
d->msgent->endmsg(d->top->closure, &d->status);
--d->top;
d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f);
View
46 src/upb_stream.h
@@ -88,11 +88,12 @@ upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val);
-
-typedef struct {
+struct _upb_decoder;
+typedef struct _upb_fieldent {
bool junk;
upb_fieldtype_t type;
bool repeated;
+ bool is_repeated_primitive;
uint32_t number;
// For upb_issubmsg(f) only, the index into the msgdef array of the submsg.
// -1 if unset (indicates that submsg should be skipped).
@@ -106,23 +107,26 @@ typedef struct {
uint32_t jit_pclabel;
uint32_t jit_pclabel_notypecheck;
uint32_t jit_submsg_done_pclabel;
-} upb_handlers_fieldent;
+ void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+} upb_fieldent;
-typedef struct _upb_handlers_msgent {
+typedef struct _upb_msgent {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_unknownval_handler_t unknownval;
- // Maps field number -> upb_handlers_fieldent.
+ // Maps field number -> upb_fieldent.
upb_inttable fieldtab;
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_unknownfield_pclabel;
- upb_handlers_fieldent *endgroup_f; // NULL if not a group.
+ bool is_group;
int32_t jit_parent_field_done_pclabel;
uint32_t max_field_number;
+ // Currently keyed on field number. Could also try keying it
+ // on encoded or decoded tag, or on encoded field number.
void **tablearray;
-} upb_handlers_msgent;
+} upb_msgent;
typedef struct {
upb_msgdef *msgdef;
@@ -131,10 +135,10 @@ typedef struct {
struct _upb_handlers {
// Array of msgdefs, [0]=toplevel.
- upb_handlers_msgent *msgs;
+ upb_msgent *msgs;
int msgs_len, msgs_size;
upb_msgdef *toplevel_msgdef; // We own a ref.
- upb_handlers_msgent *msgent;
+ upb_msgent *msgent;
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit;
bool should_jit;
};
@@ -272,12 +276,11 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated);
void upb_handlers_typed_pop(upb_handlers *h);
-INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h,
- upb_handlers_fieldent *f) {
+INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) {
assert(f->msgent_index != -1);
return &h->msgs[f->msgent_index];
}
-upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum);
+upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum);
/* upb_dispatcher *************************************************************/
@@ -298,11 +301,12 @@ upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_fie
// consumed, like if this is a submessage of a larger stream.
typedef struct {
- upb_handlers_fieldent *f;
+ upb_fieldent *f;
void *closure;
// Relative to the beginning of this buffer.
// For groups and the top-level: UINT32_MAX.
uint32_t end_offset;
+ bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND
} upb_dispatcher_frame;
typedef struct {
@@ -311,7 +315,7 @@ typedef struct {
upb_handlers *handlers;
// Msg and dispatch table for the current level.
- upb_handlers_msgent *msgent;
+ upb_msgent *msgent;
upb_inttable *dispatch_table;
// The number of startsubmsg calls without a corresponding endsubmsg call.
@@ -342,8 +346,6 @@ INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) {
}
-typedef upb_handlers_fieldent upb_dispatcher_field;
-
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h);
void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset);
void upb_dispatcher_uninit(upb_dispatcher *d);
@@ -352,20 +354,20 @@ upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
// Looks up a field by number for the current message.
-INLINE upb_dispatcher_field *upb_dispatcher_lookup(upb_dispatcher *d,
- upb_field_number_t n) {
- return (upb_dispatcher_field*)upb_inttable_fastlookup(
- d->dispatch_table, n, sizeof(upb_dispatcher_field));
+INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d,
+ upb_field_number_t n) {
+ return (upb_fieldent*)upb_inttable_fastlookup(
+ d->dispatch_table, n, sizeof(upb_fieldent));
}
// Dispatches values or submessages -- the client is responsible for having
// previously looked up the field.
upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
- upb_dispatcher_field *f,
+ upb_fieldent *f,
size_t userval);
upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d);
-INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_dispatcher_field *f,
+INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f,
upb_value val) {
if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG;
upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val);
View
5 src/upb_string.h
@@ -182,6 +182,11 @@ INLINE void upb_string_recycle(upb_string **_str) {
str->len = 0;
_upb_string_release(str);
} else {
+ //if (!str) {
+ // printf("!str\n");
+ //}
+ //else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); }
+ //else { printf("Some other reason.\n"); }
upb_string_unref(str);
*_str = upb_string_new();
}
View
9 src/upb_textprinter.c
@@ -7,9 +7,10 @@
#include "upb_textprinter.h"
+#include <ctype.h>
+#include <float.h>
#include <inttypes.h>
#include <stdlib.h>
-#include <ctype.h>
struct _upb_textprinter {
upb_bytesink *bytesink;
@@ -99,10 +100,12 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
+ // TODO: figure out what we should really be doing for these
+ // floating-point formats.
case UPB_TYPE(DOUBLE):
- CASE("%0.f", double);
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
case UPB_TYPE(FLOAT):
- CASE("%0.f", float)
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64):
View
54 src/upb_varint.c
@@ -0,0 +1,54 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb_varint.h"
+
+// Given an encoded varint v, returns an integer with a single bit set that
+// indicates the end of the varint. Subtracting one from this value will
+// yield a mask that leaves only bits that are part of the varint. Returns
+// 0 if the varint is unterminated.
+INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+ uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+ return ~cbits & (cbits+1);
+}
+INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
+
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+ uint64_t b;
+ memcpy(&b, r.p, sizeof(b));
+ uint64_t stop_bit = upb_get_vstopbit(b);
+ b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
+ b += b & 0x007f007f007f007fULL;
+ b += 3 * (b & 0x0000ffff0000ffffULL);
+ b += 15 * (b & 0x00000000ffffffffULL);
+ if (stop_bit == 0) {
+ // Error: unterminated varint.
+ upb_decoderet err_r = {(void*)0, 0};
+ return err_r;
+ }
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 7)};
+ return my_r;
+}
+
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+ uint64_t b;
+ memcpy(&b, r.p, sizeof(b));
+ uint64_t stop_bit = upb_get_vstopbit(b);
+ b &= (stop_bit - 1);
+ b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
+ b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
+ b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
+ if (stop_bit == 0) {
+ // Error: unterminated varint.
+ upb_decoderet err_r = {(void*)0, 0};
+ return err_r;
+ }
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 14)};
+ return my_r;
+}
View
50 src/upb_varint.h
@@ -75,53 +75,11 @@ INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
return r;
}
-// Given an encoded varint v, returns an integer with a single bit set that
-// indicates the end of the varint. Subtracting one from this value will
-// yield a mask that leaves only bits that are part of the varint. Returns
-// 0 if the varint is unterminated.
-INLINE uint64_t upb_get_vstopbit(uint64_t v) {
- uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
- return ~cbits & (cbits+1);
-}
-INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
-
// Decodes a varint of at most 8 bytes without branching (except for error).
-INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
- uint64_t b;
- memcpy(&b, r.p, sizeof(b));
- uint64_t stop_bit = upb_get_vstopbit(b);
- b &= (stop_bit - 1);
- b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
- b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
- b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
- if (stop_bit == 0) {
- // Error: unterminated varint.
- upb_decoderet err_r = {(void*)0, 0};
- return err_r;
- }
- upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
- r.val | (b << 14)};
- return my_r;
-}
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
// Another implementation of the previous.
-INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
- uint64_t b;
- memcpy(&b, r.p, sizeof(b));
- uint64_t stop_bit = upb_get_vstopbit(b);
- b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
- b += b & 0x007f007f007f007fULL;
- b += 3 * (b & 0x0000ffff0000ffffULL);
- b += 15 * (b & 0x00000000ffffffffULL);
- if (stop_bit == 0) {
- // Error: unterminated varint.
- upb_decoderet err_r = {(void*)0, 0};
- return err_r;
- }
- upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
- r.val | (b << 7)};
- return my_r;
-}
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.
@@ -169,8 +127,8 @@ INLINE size_t upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
-// Currently only works with 32-bit varints.
-INLINE uint64_t upb_vencode(uint32_t val) {
+// Encodes a 32-bit varint, *not* sign-extended.
+INLINE uint64_t upb_vencode32(uint32_t val) {
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));
View
67 tests/test_decoder.c
@@ -2,38 +2,73 @@
#include "upb_decoder.h"
#include "upb_textprinter.h"
#include "upb_stdio.h"
+#include "upb_glue.h"
+
+int main(int argc, char *argv[]) {
+ if (argc < 3) {
+ fprintf(stderr, "Usage: test_decoder <descfile> <msgname>\n");
+ return 1;
+ }
-int main() {
upb_symtab *symtab = upb_symtab_new();
- upb_symtab_add_descriptorproto(symtab);
- upb_def *fds = upb_symtab_lookup(
- symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
+ upb_string *desc = upb_strreadfile(argv[1]);
+ if (!desc) {
+ fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
+ return 1;
+ }
+
+ upb_status status = UPB_STATUS_INIT;
+ upb_parsedesc(symtab, desc, &status);
+ if (!upb_ok(&status)) {
+ fprintf(stderr, "Error parsing descriptor: ");
+ upb_printerr(&status);
+ return 1;
+ }
+ upb_string_unref(desc);
+
+ upb_string *name = upb_strdupc(argv[2]);
+ upb_def *md = upb_symtab_lookup(symtab, name);
+ upb_string_unref(name);
+ if (!md) {
+ fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
+ return 1;
+ }
+
+ upb_msgdef *m = upb_dyncast_msgdef(md);
+ if (!m) {
+ fprintf(stderr, "Def was not a msgdef.\n");
+ return 1;
+ }
upb_stdio *in = upb_stdio_new();
upb_stdio_reset(in, stdin);
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
- upb_decoder d;
- upb_decoder_init(&d, upb_downcast_msgdef(fds));
- upb_decoder_reset(&d, upb_stdio_bytesrc(in));
- upb_textprinter *p = upb_textprinter_new();
+
upb_handlers handlers;
- upb_handlers_init(&handlers);
- upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false);
- upb_src *src = upb_decoder_src(&d);
- upb_src_sethandlers(src, &handlers);
+ upb_handlers_init(&handlers, m);
+ upb_textprinter *p = upb_textprinter_new();
+ upb_textprinter_reset(p, upb_stdio_bytesink(out), false);
+ upb_textprinter_reghandlers(&handlers);
- upb_status status = UPB_STATUS_INIT;
- upb_src_run(src, &status);
+ upb_decoder d;
+ upb_decoder_init(&d, &handlers);
+ upb_decoder_reset(&d, upb_stdio_bytesrc(in), p);
+
+ upb_clearerr(&status);
+ upb_decoder_decode(&d, &status);
- assert(upb_ok(&status));
+ if (!upb_ok(&status)) {
+ fprintf(stderr, "Error parsing input: ");
+ upb_printerr(&status);
+ }
upb_status_uninit(&status);
upb_stdio_free(in);
upb_stdio_free(out);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
- upb_def_unref(fds);
+ upb_def_unref(UPB_UPCAST(m));
upb_symtab_unref(symtab);
// Prevent C library from holding buffers open, so Valgrind doesn't see
Please sign in to comment.
Something went wrong with that request. Please try again.