Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cpp/fory/serialization/context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,8 @@ Result<const TypeInfo *, Error> ReadContext::read_type_meta() {
// Check if we already parsed this type meta (cache lookup by header)
if (has_last_meta_header_ && meta_header == last_meta_header_) {
// Header-cache hits intentionally skip without rehashing. Entries reach
// this cache only after a successful TypeMeta parse and 52-bit body-hash
// validation.
// this cache only after a successful TypeMeta parse and 52-bit
// metadata-hash validation.
const TypeInfo *cached = last_meta_type_info_;
reading_type_infos_.push_back(cached);
FORY_RETURN_NOT_OK(
Expand All @@ -518,8 +518,8 @@ Result<const TypeInfo *, Error> ReadContext::read_type_meta() {
auto *cache_entry = parsed_type_infos_.find(meta_header);
if (cache_entry != nullptr) {
// Header-cache hits intentionally skip without rehashing. Entries reach
// this cache only after a successful TypeMeta parse and 52-bit body-hash
// validation.
// this cache only after a successful TypeMeta parse and 52-bit
// metadata-hash validation.
const TypeInfo *cached = cache_entry->second;
reading_type_infos_.push_back(cached);
has_last_meta_header_ = true;
Expand Down
55 changes: 52 additions & 3 deletions cpp/fory/serialization/serialization_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,27 @@ namespace test {
namespace {

uint64_t compute_type_meta_hash_bits_for_test(const uint8_t *meta_bytes,
size_t meta_size) {
size_t meta_size,
uint64_t header_low_bits) {
constexpr uint32_t kHashShift = 12;
constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
std::vector<uint8_t> hash_input(meta_size + 2);
std::memcpy(hash_input.data(), meta_bytes, meta_size);
hash_input[meta_size] = static_cast<uint8_t>(header_low_bits);
hash_input[meta_size + 1] = static_cast<uint8_t>(header_low_bits >> 8);
int64_t hash_out[2] = {0, 0};
MurmurHash3_x64_128(hash_input.data(), static_cast<int>(hash_input.size()),
47, hash_out);
uint64_t shifted = static_cast<uint64_t>(hash_out[0]) << kHashShift;
if (static_cast<int64_t>(shifted) < 0) {
shifted = ~shifted + 1;
}
return shifted & kHashBitsMask;
}

uint64_t
compute_body_only_type_meta_hash_bits_for_test(const uint8_t *meta_bytes,
size_t meta_size) {
constexpr uint32_t kHashShift = 12;
constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
int64_t hash_out[2] = {0, 0};
Expand Down Expand Up @@ -829,7 +849,7 @@ TEST(SerializationTest, TypeMetaRejectsOverConsumedDeclaredSize) {
EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData);
}

TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) {
TEST(SerializationTest, TypeMetaHeaderUses52BitMetadataHash) {
std::vector<FieldInfo> fields;
fields.emplace_back(
"value", FieldType(static_cast<uint32_t>(TypeId::VARINT32), false));
Expand Down Expand Up @@ -869,6 +889,35 @@ TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) {
parsed.value()->get_hash());
}

TEST(SerializationTest, TypeMetaRejectsBodyOnlyHeaderHash) {
TypeMeta meta =
TypeMeta::from_fields(static_cast<uint32_t>(TypeId::STRUCT), "", "S",
false, 1, std::vector<FieldInfo>{});
auto bytes_result = meta.to_bytes();
ASSERT_TRUE(bytes_result.ok())
<< "TypeMeta serialization failed: " << bytes_result.error().to_string();

std::vector<uint8_t> bytes = bytes_result.value();
ASSERT_GT(bytes.size(), sizeof(uint64_t));
uint64_t header = 0;
std::memcpy(&header, bytes.data(), sizeof(header));

constexpr uint32_t kHashShift = 12;
constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
uint64_t body_only_hash = compute_body_only_type_meta_hash_bits_for_test(
bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t));
ASSERT_NE(header & kHashBitsMask, body_only_hash);
header = body_only_hash | (header & ~kHashBitsMask);
std::memcpy(bytes.data(), &header, sizeof(header));

Buffer buffer(bytes);
auto parsed = TypeMeta::from_bytes(buffer, nullptr);
ASSERT_FALSE(parsed.ok());
EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData);
EXPECT_NE(parsed.error().to_string().find("metadata hash"),
std::string::npos);
}

TEST(SerializationTest, TypeMetaNonStructHeaderUsesDenseKindCode) {
TypeMeta meta =
TypeMeta::from_fields(static_cast<uint32_t>(TypeId::ENUM), "", "E", false,
Expand Down Expand Up @@ -902,7 +951,7 @@ TEST(SerializationTest, TypeMetaRejectsNonStructReservedKindBits) {
ASSERT_NE(header & 0xff, 0xff);
header &= ~(UINT64_MAX << 12);
header |= compute_type_meta_hash_bits_for_test(
bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t));
bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t), header);
std::memcpy(bytes.data(), &header, sizeof(header));

Buffer buffer(bytes);
Expand Down
30 changes: 20 additions & 10 deletions cpp/fory/serialization/type_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,15 @@ inline Result<uint32_t, Error> type_id_from_type_meta_kind(uint8_t kind_code) {
}

inline uint64_t compute_type_meta_hash_bits(const uint8_t *meta_bytes,
size_t meta_size) {
size_t meta_size,
uint64_t header_low_bits) {
std::vector<uint8_t> hash_input(meta_size + 2);
std::memcpy(hash_input.data(), meta_bytes, meta_size);
hash_input[meta_size] = static_cast<uint8_t>(header_low_bits);
hash_input[meta_size + 1] = static_cast<uint8_t>(header_low_bits >> 8);
int64_t hash_out[2] = {0, 0};
MurmurHash3_x64_128(meta_bytes, static_cast<int>(meta_size), 47, hash_out);
MurmurHash3_x64_128(hash_input.data(), static_cast<int>(hash_input.size()),
47, hash_out);
uint64_t shifted = static_cast<uint64_t>(hash_out[0]) << TYPE_META_HASH_SHIFT;
if (static_cast<int64_t>(shifted) < 0) {
shifted = ~shifted + 1;
Expand All @@ -390,8 +396,10 @@ inline uint64_t compute_type_meta_hash_bits(const uint8_t *meta_bytes,

inline int64_t compute_type_meta_hash(const uint8_t *meta_bytes,
size_t meta_size) {
uint64_t header_low_bits =
std::min<uint64_t>(META_SIZE_MASK, static_cast<uint64_t>(meta_size));
return static_cast<int64_t>(
compute_type_meta_hash_bits(meta_bytes, meta_size) >>
compute_type_meta_hash_bits(meta_bytes, meta_size, header_low_bits) >>
TYPE_META_HASH_SHIFT);
}

Expand Down Expand Up @@ -434,18 +442,19 @@ read_type_meta_size(Buffer &buffer, uint64_t header, size_t *header_size) {
inline Result<void, Error> validate_type_meta_hash(Buffer &buffer,
uint32_t body_start,
uint32_t meta_size,
int64_t header_hash) {
uint64_t header) {
uint64_t body_end = static_cast<uint64_t>(body_start) + meta_size;
if (FORY_PREDICT_FALSE(body_end > buffer.reader_index() ||
body_end > buffer.size())) {
return Unexpected(
Error::invalid_data("TypeMeta body range is not readable"));
}
uint64_t computed_hash_bits = compute_type_meta_hash_bits(
buffer.data() + body_start, static_cast<size_t>(meta_size));
buffer.data() + body_start, static_cast<size_t>(meta_size),
header & ~TYPE_META_HASH_BITS_MASK);
if (FORY_PREDICT_FALSE((computed_hash_bits >> TYPE_META_HASH_SHIFT) !=
static_cast<uint64_t>(header_hash))) {
return Unexpected(Error::invalid_data("TypeMeta body hash mismatch"));
(header >> TYPE_META_HASH_SHIFT))) {
return Unexpected(Error::invalid_data("TypeMeta metadata hash mismatch"));
}
return Result<void, Error>();
}
Expand Down Expand Up @@ -574,7 +583,8 @@ Result<std::vector<uint8_t>, Error> TypeMeta::to_bytes() const {
uint64_t meta_size = layer_size;
uint64_t header = std::min(META_SIZE_MASK, meta_size);

header |= compute_type_meta_hash_bits(layer_buffer.data(), layer_size);
header |=
compute_type_meta_hash_bits(layer_buffer.data(), layer_size, header);

result_buffer.write_bytes(reinterpret_cast<const uint8_t *>(&header),
sizeof(header));
Expand Down Expand Up @@ -700,7 +710,7 @@ TypeMeta::from_bytes(Buffer &buffer, const TypeMeta *local_type_info) {
"TypeMeta parser did not consume declared meta size"));
}
FORY_RETURN_IF_ERROR(
validate_type_meta_hash(buffer, body_start, meta_size, meta_hash));
validate_type_meta_hash(buffer, body_start, meta_size, header_bits));

auto meta = std::make_unique<TypeMeta>();
meta->hash = meta_hash;
Expand Down Expand Up @@ -811,7 +821,7 @@ TypeMeta::from_bytes_with_header(Buffer &buffer, int64_t header) {
"TypeMeta parser did not consume declared meta size"));
}
FORY_RETURN_IF_ERROR(
validate_type_meta_hash(buffer, start_pos, meta_size, meta_hash));
validate_type_meta_hash(buffer, start_pos, meta_size, header_bits));

auto meta = std::make_unique<TypeMeta>();
meta->hash = meta_hash;
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/Fory/ReadContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ internal TypeMeta ReadTypeMeta()
if (TryGetCachedReadTypeMeta(header, out TypeMeta cachedTypeMeta))
{
// Header-cache hits intentionally skip without rehashing. Entries reach this cache only
// after a successful TypeMeta parse and 52-bit body-hash validation. The current body
// after a successful TypeMeta parse and 52-bit metadata-hash validation. The current body
// size still comes from the current header bytes, not from the cached TypeMeta.
TypeMeta.SkipBody(Reader, header);
StoreReadTypeMeta(cachedTypeMeta, index);
Expand Down
50 changes: 40 additions & 10 deletions csharp/src/Fory/TypeMeta.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

using System.Buffers;

namespace Apache.Fory;

internal static class TypeMetaConstants
Expand Down Expand Up @@ -467,9 +469,8 @@ public byte[] Encode()
}

byte[] body = EncodeBody();
ulong header = ComputeHeaderHashBits(body);
uint bodySize = (uint)Math.Min(body.Length, (int)TypeMetaConstants.TypeMetaSizeMask);
header |= bodySize;
ulong headerLowBits = ComputeHeaderLowBits(body.Length, compressed: false);
ulong header = ComputeHeaderHashBits(body, headerLowBits) | headerLowBits;
ByteWriter writer = new(body.Length + 16);
writer.WriteUInt64(header);
if (body.Length >= (int)TypeMetaConstants.TypeMetaSizeMask)
Expand Down Expand Up @@ -609,18 +610,47 @@ internal static void SkipBody(ByteReader reader, ulong header)
reader.Skip(ReadBodySize(reader, header));
}

private static ulong ComputeHeaderHashBits(ReadOnlySpan<byte> body)
private static ulong ComputeHeaderLowBits(int bodyLength, bool compressed)
{
ulong headerLowBits = (ulong)Math.Min(bodyLength, (int)TypeMetaConstants.TypeMetaSizeMask);
if (compressed)
{
headerLowBits |= TypeMetaConstants.TypeMetaCompressedFlag;
}

return headerLowBits;
}

private static ulong ComputeHeaderHashBits(ReadOnlySpan<byte> body, ulong headerLowBits)
{
(ulong bodyHash, _) = MurmurHash3.X64_128(body, TypeMetaConstants.TypeMetaHashSeed);
ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift;
long signed = unchecked((long)shifted);
long absSigned = signed == long.MinValue ? signed : Math.Abs(signed);
return unchecked((ulong)absSigned) & TypeMetaConstants.TypeMetaHashMask;
int hashInputLength = body.Length + sizeof(ushort);
byte[]? rented = null;
Span<byte> hashInput = hashInputLength <= 1024
? stackalloc byte[hashInputLength]
: (rented = ArrayPool<byte>.Shared.Rent(hashInputLength)).AsSpan(0, hashInputLength);
try
{
body.CopyTo(hashInput);
hashInput[body.Length] = unchecked((byte)headerLowBits);
hashInput[body.Length + 1] = unchecked((byte)(headerLowBits >> 8));
(ulong bodyHash, _) = MurmurHash3.X64_128(hashInput, TypeMetaConstants.TypeMetaHashSeed);
ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift;
long signed = unchecked((long)shifted);
long absSigned = signed == long.MinValue ? signed : Math.Abs(signed);
return unchecked((ulong)absSigned) & TypeMetaConstants.TypeMetaHashMask;
}
finally
{
if (rented is not null)
{
ArrayPool<byte>.Shared.Return(rented);
}
}
}

private static void ValidateParsedTypeMetaHash(ulong header, ReadOnlySpan<byte> body)
{
ulong expectedHeaderHash = ComputeHeaderHashBits(body);
ulong expectedHeaderHash = ComputeHeaderHashBits(body, header & ~TypeMetaConstants.TypeMetaHashMask);
ulong actualHeaderHash = header & TypeMetaConstants.TypeMetaHashMask;
if (actualHeaderHash != expectedHeaderHash)
{
Expand Down
49 changes: 49 additions & 0 deletions csharp/tests/Fory.Tests/ForyRuntimeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

using System.Buffers;
using System.Buffers.Binary;
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.Threading.Tasks;
Expand Down Expand Up @@ -1735,6 +1736,33 @@ public void CompatibleTypeMetaCacheMissValidatesBodyHashBeforeCaching()
Assert.Contains("TypeMeta metadata hash mismatch", exception.Message, StringComparison.Ordinal);
}

[Fact]
public void TypeMetaHeaderHashIncludesLowHeaderBits()
{
TypeMeta typeMeta = new(
(uint)TypeId.CompatibleStruct,
201,
MetaString.Empty('.', '_'),
MetaString.Empty('$', '_'),
registerByName: false,
[new TypeMetaFieldInfo(1, "value", new TypeMetaFieldType((uint)TypeId.String, true))]);
byte[] encoded = typeMeta.Encode();
ulong header = BinaryPrimitives.ReadUInt64LittleEndian(encoded);
int bodyOffset = TypeMetaBodyOffset(encoded, header);
ulong hashMask = ulong.MaxValue << 12;
ulong bodyOnlyHash = BodyOnlyTypeMetaHashBits(encoded.AsSpan(bodyOffset));
Assert.NotEqual(header & hashMask, bodyOnlyHash);

byte[] malformed = (byte[])encoded.Clone();
BinaryPrimitives.WriteUInt64LittleEndian(
malformed,
bodyOnlyHash | (header & ~hashMask));

InvalidDataException exception =
Assert.Throws<InvalidDataException>(() => TypeMeta.Decode(malformed));
Assert.Contains("TypeMeta metadata hash mismatch", exception.Message, StringComparison.Ordinal);
}

[Fact]
public void TypeMetaAssignFieldIdsPrefersIdAndFallsBackToName()
{
Expand Down Expand Up @@ -1889,6 +1917,27 @@ private static byte[] CorruptCompatibleTypeMetaBody(byte[] payload)
return malformed;
}

private static int TypeMetaBodyOffset(byte[] encoded, ulong header)
{
ByteReader reader = new(encoded);
_ = reader.ReadUInt64();
if ((header & 0xff) == 0xff)
{
_ = reader.ReadVarUInt32();
}

return reader.Cursor;
}

private static ulong BodyOnlyTypeMetaHashBits(ReadOnlySpan<byte> body)
{
(ulong bodyHash, _) = MurmurHash3.X64_128(body, 47);
ulong shifted = bodyHash << 12;
long signed = unchecked((long)shifted);
long absSigned = signed == long.MinValue ? signed : Math.Abs(signed);
return unchecked((ulong)absSigned) & (ulong.MaxValue << 12);
}

private static (int TypeMetaStart, int TypeMetaEnd, TypeMeta TypeMeta) ReadCompatibleTypeMetaRange(byte[] payload)
{
ByteReader reader = new(payload);
Expand Down
6 changes: 5 additions & 1 deletion dart/packages/fory/lib/src/meta/type_meta.dart
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ final class WireTypeMeta {
final class TypeHeader {
static const int _compressMetaFlag = 1 << 8;
static const int _reservedMetaFlags = 0x0e00;
static const int _headerLowBitsMask = 0x0fff;
static const int _hashLow32Mask = 0xfffff000;

final Int64 value;
Expand Down Expand Up @@ -89,7 +90,10 @@ final class TypeHeader {

@pragma('vm:prefer-inline')
void validateBodyHash(Uint8List body) {
final expected = typeDefHeader(body);
final expected = typeDefHeader(
body,
headerLowBits: value.low32 & _headerLowBitsMask,
);
if (value.high32Unsigned != expected.high32Unsigned ||
(value.low32 & _hashLow32Mask) != (expected.low32 & _hashLow32Mask)) {
throw StateError('Invalid TypeDef metadata hash.');
Expand Down
4 changes: 2 additions & 2 deletions dart/packages/fory/lib/src/resolver/type_resolver.dart
Original file line number Diff line number Diff line change
Expand Up @@ -1070,15 +1070,15 @@ final class TypeResolver {
final expectedTypeDef = expectedType?.typeDef;
if (expectedTypeDef != null && expectedTypeDef.header == header.value) {
// Header-cache hits intentionally skip without rehashing. Entries reach this cache only
// after a successful TypeDef parse and 52-bit body-hash validation.
// after a successful TypeDef parse and 52-bit metadata-hash validation.
header.skipRemaining(buffer);
sharedTypes.add(expectedType!);
return wireTypeMetaForResolved(expectedType);
}
final cached = _parsedTypeMetaCache.lookup(header);
if (cached != null) {
// Header-cache hits intentionally skip without rehashing. Entries reach this cache only
// after a successful TypeDef parse and 52-bit body-hash validation.
// after a successful TypeDef parse and 52-bit metadata-hash validation.
header.skipRemaining(buffer);
sharedTypes.add(cached);
return wireTypeMetaForResolved(cached);
Expand Down
Loading
Loading