Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add buffer verification functionality to FlatBuffers
Bug: 15732628
Change-Id: I0b7cb65982d6b8957d5a899cca7d2b5d2ef53206
Tested: On Windows, OS X and Linux
  • Loading branch information
Wouter van Oortmerssen committed Jul 2, 2014
1 parent 5904311 commit a0b6ffc
Show file tree
Hide file tree
Showing 8 changed files with 343 additions and 25 deletions.
37 changes: 35 additions & 2 deletions docs/source/CppUsage.md
Expand Up @@ -47,7 +47,7 @@ we can construct them in a familiar way.
We have now serialized the non-scalar components of of the monster
example, so we could create the monster something like this:

auto mloc = CreateMonster(fbb, &vec, 150, 80, name, inventory, Color_Red, Offset<void>(0), Any_NONE);
auto mloc = CreateMonster(fbb, &vec, 150, 80, name, inventory, Color_Red, 0, Any_NONE);

Note that we're passing `150` for the `mana` field, which happens to be the
default value: this means the field will not actually be written to the buffer,
Expand All @@ -58,7 +58,8 @@ since they won't bloat up the buffer sizes if they're not actually used.

We do something similarly for the union field `test` by specifying a `0` offset
and the `NONE` enum value (part of every union) to indicate we don't actually
want to write this field.
want to write this field. You can use `0` also as a default for other
non-scalar types, such as strings, vectors and tables.

Tables (like `Monster`) give you full flexibility on what fields you write
(unlike `Vec3`, which always has all fields set because it is a `struct`).
Expand Down Expand Up @@ -155,6 +156,38 @@ machines, so only use tricks like this if you can guarantee you're not
shipping on a big endian machine (an `assert(FLATBUFFERS_LITTLEENDIAN)`
would be wise).

### Access of untrusted buffers

The generated accessor functions access fields over offsets, which is
very quick. These offsets are not verified at run-time, so a malformed
buffer could cause a program to crash by accessing random memory.

When you're processing large amounts of data from a source you know (e.g.
your own generated data on disk), this is acceptable, but when reading
data from the network that can potentially have been modified by an
attacker, this is undesirable.

For this reason, you can optionally use a buffer verifier before you
access the data. This verifier will check all offsets, all sizes of
fields, and null termination of strings to ensure that when a buffer
is accessed, all reads will end up inside the buffer.

Each root type will have a verification function generated for it,
e.g. for `Monster`, you can call:

bool ok = VerifyMonsterBuffer(Verifier(buf, len));

if `ok` is true, the buffer is safe to read.

Besides untrusted data, this function may be useful to call in debug
mode, as extra insurance against data being corrupted somewhere along
the way.

While verifying a buffer isn't "free", it is typically faster than
a full traversal (since any scalar data is not actually touched),
and since it may cause the buffer to be brought into cache before
reading, the actual overhead may be even lower than expected.

## Text & schema parsing

Using binary buffers with the generated header provides a super low
Expand Down
130 changes: 117 additions & 13 deletions include/flatbuffers/flatbuffers.h
Expand Up @@ -76,7 +76,7 @@ typedef uintmax_t largest_scalar_t;
template<typename T> struct Offset {
uoffset_t o;
Offset() : o(0) {}
explicit Offset(uoffset_t _o) : o(_o) {}
Offset(uoffset_t _o) : o(_o) {}
Offset<void> Union() const { return Offset<void>(o); }
};

Expand Down Expand Up @@ -407,13 +407,13 @@ class FlatBufferBuilder {
buf_.fill(numfields * sizeof(voffset_t));
auto table_object_size = vtableoffsetloc - start;
assert(table_object_size < 0x10000); // Vtable use 16bit offsets.
PushElement<voffset_t>(table_object_size);
PushElement<voffset_t>(static_cast<voffset_t>(table_object_size));
PushElement<voffset_t>(FieldIndexToOffset(numfields));
// Write the offsets into the table
for (auto field_location = offsetbuf_.begin();
field_location != offsetbuf_.end();
++field_location) {
auto pos = (vtableoffsetloc - field_location->off);
auto pos = static_cast<voffset_t>(vtableoffsetloc - field_location->off);
// If this asserts, it means you've set a field twice.
assert(!ReadScalar<voffset_t>(buf_.data() + field_location->id));
WriteScalar<voffset_t>(buf_.data() + field_location->id, pos);
Expand Down Expand Up @@ -563,7 +563,90 @@ template<typename T> const T *GetRoot(const void *buf) {
EndianScalar(*reinterpret_cast<const uoffset_t *>(buf)));
}

// "structs_" are flat structures that do not have an offset table, thus
// Helper class to verify the integrity of a FlatBuffer
class Verifier {
public:
Verifier(const uint8_t *buf, size_t buf_len)
: buf_(buf), end_(buf + buf_len)
{}

// Verify any range within the buffer.
bool Verify(const void *elem, size_t elem_len) const {
bool ok = elem >= buf_ && elem <= end_ - elem_len;
assert(ok);
return ok;
}

// Verify a range indicated by sizeof(T).
template<typename T> bool Verify(const void *elem) const {
return Verify(elem, sizeof(T));
}

// Verify a pointer (may be NULL) of any vector type.
template<typename T> bool Verify(const Vector<T> *vec) const {
const uint8_t *end;
return !vec ||
VerifyVector(reinterpret_cast<const uint8_t *>(vec), sizeof(T),
&end);
}

// Verify a pointer (may be NULL) to string.
bool Verify(const String *str) const {
const uint8_t *end;
return !str ||
(VerifyVector(reinterpret_cast<const uint8_t *>(str), 1, &end) &&
Verify(end, 1) && // Must have terminator
*end == '\0'); // Terminating byte must be 0.
}

// Common code between vectors and strings.
bool VerifyVector(const uint8_t *vec, size_t elem_size,
const uint8_t **end) const {
// Check we can read the size field.
if (!Verify<uoffset_t>(vec)) return false;
// Check the whole array. If this is a string, the byte past the array
// must be 0.
auto size = ReadScalar<uoffset_t>(vec);
auto byte_size = sizeof(size) + elem_size * size;
*end = vec + byte_size;
return Verify(vec, byte_size);
}

// Special case for string contents, after the above has been called.
bool VerifyVectorOfStrings(const Vector<Offset<String>> *vec) const {
if (vec) {
for (uoffset_t i = 0; i < vec->Length(); i++) {
if (!Verify(vec->Get(i))) return false;
}
}
return true;
}

// Special case for table contents, after the above has been called.
template<typename T> bool VerifyVectorOfTables(const Vector<Offset<T>> *vec)
const {
if (vec) {
for (uoffset_t i = 0; i < vec->Length(); i++) {
if (!vec->Get(i)->Verify(*this)) return false;
}
}
return true;
}

// Verify this whole buffer, starting with root type T.
template<typename T> bool VerifyBuffer() const {
// Call T::Verify, which must be in the generated code for this type.
return Verify<uoffset_t>(buf_) &&
reinterpret_cast<const T *>(buf_ + ReadScalar<uoffset_t>(buf_))->
Verify(*this);
}

private:
const uint8_t *buf_;
const uint8_t *end_;
};

// "structs" are flat structures that do not have an offset table, thus
// always have all members present and do not support forwards/backwards
// compatible extensions.

Expand Down Expand Up @@ -594,7 +677,7 @@ class Table {
// if the field was not present.
voffset_t GetOptionalFieldOffset(voffset_t field) const {
// The vtable offset is always at the start.
auto vtable = &data_ - ReadScalar<soffset_t>(&data_);
auto vtable = data_ - ReadScalar<soffset_t>(data_);
// The first element is the size of the vtable (fields + type id + itself).
auto vtsize = ReadScalar<voffset_t>(vtable);
// If the field we're accessing is outside the vtable, we're reading older
Expand All @@ -604,20 +687,20 @@ class Table {

template<typename T> T GetField(voffset_t field, T defaultval) const {
auto field_offset = GetOptionalFieldOffset(field);
return field_offset ? ReadScalar<T>(&data_[field_offset]) : defaultval;
return field_offset ? ReadScalar<T>(data_ + field_offset) : defaultval;
}

template<typename P> P GetPointer(voffset_t field) const {
auto field_offset = GetOptionalFieldOffset(field);
auto p = &data_[field_offset];
auto p = data_ + field_offset;
return field_offset
? reinterpret_cast<P>(p + ReadScalar<uoffset_t>(p))
: nullptr;
}

template<typename P> P GetStruct(voffset_t field) const {
auto field_offset = GetOptionalFieldOffset(field);
return field_offset ? reinterpret_cast<P>(&data_[field_offset]) : nullptr;
return field_offset ? reinterpret_cast<P>(data_ + field_offset) : nullptr;
}

template<typename T> void SetField(voffset_t field, T val) {
Expand All @@ -626,29 +709,50 @@ class Table {
// (or should we return a bool instead?).
// check if it exists first using CheckField()
assert(field_offset);
WriteScalar(&data_[field_offset], val);
WriteScalar(data_ + field_offset, val);
}

bool CheckField(voffset_t field) const {
return GetOptionalFieldOffset(field) != 0;
}

// Verify the vtable of this table.
// Call this once per table, followed by VerifyField once per field.
bool VerifyTable(const Verifier &verifier) const {
// Check the vtable offset.
if (!verifier.Verify<soffset_t>(data_)) return false;
auto vtable = data_ - ReadScalar<soffset_t>(data_);
// Check the vtable size field, then check vtable fits in its entirety.
return verifier.Verify<voffset_t>(vtable) &&
verifier.Verify(vtable, ReadScalar<voffset_t>(vtable));
}

// Verify a particular field.
template<typename T> bool VerifyField(const Verifier &verifier,
voffset_t field) const {
// Calling GetOptionalFieldOffset should be safe now thanks to
// VerifyTable().
auto field_offset = GetOptionalFieldOffset(field);
// Check the actual field.
return !field_offset || verifier.Verify<T>(data_ + field_offset);
}

private:
// private constructor & copy constructor: you obtain instances of this
// class by pointing to existing data only
Table() {};
Table(const Table &other) {};
Table();
Table(const Table &other);

uint8_t data_[1];
};

// Utility function for reverse lookups on the EnumNames*() functions
// (in the generated C++ code)
// names must be NULL terminated.
inline size_t LookupEnum(const char **names, const char *name) {
inline int LookupEnum(const char **names, const char *name) {
for (const char **p = names; *p; p++)
if (!strcmp(*p, name))
return p - names;
return static_cast<int>(p - names);
return -1;
}

Expand Down

0 comments on commit a0b6ffc

Please sign in to comment.