diff --git a/cpp/fory/serialization/context.cc b/cpp/fory/serialization/context.cc index ef657fcf72..6eec0267d9 100644 --- a/cpp/fory/serialization/context.cc +++ b/cpp/fory/serialization/context.cc @@ -506,8 +506,8 @@ Result ReadContext::read_type_meta() { // Check if we already parsed this type meta (cache lookup by header) if (has_last_meta_header_ && meta_header == last_meta_header_) { // Header-cache hits intentionally skip without rehashing. Entries reach - // this cache only after a successful TypeMeta parse and 52-bit body-hash - // validation. + // this cache only after a successful TypeMeta parse and 52-bit + // metadata-hash validation. const TypeInfo *cached = last_meta_type_info_; reading_type_infos_.push_back(cached); FORY_RETURN_NOT_OK( @@ -518,8 +518,8 @@ Result ReadContext::read_type_meta() { auto *cache_entry = parsed_type_infos_.find(meta_header); if (cache_entry != nullptr) { // Header-cache hits intentionally skip without rehashing. Entries reach - // this cache only after a successful TypeMeta parse and 52-bit body-hash - // validation. + // this cache only after a successful TypeMeta parse and 52-bit + // metadata-hash validation. const TypeInfo *cached = cache_entry->second; reading_type_infos_.push_back(cached); has_last_meta_header_ = true; diff --git a/cpp/fory/serialization/serialization_test.cc b/cpp/fory/serialization/serialization_test.cc index 226afe42bf..27ab634fa9 100644 --- a/cpp/fory/serialization/serialization_test.cc +++ b/cpp/fory/serialization/serialization_test.cc @@ -84,7 +84,27 @@ namespace test { namespace { uint64_t compute_type_meta_hash_bits_for_test(const uint8_t *meta_bytes, - size_t meta_size) { + size_t meta_size, + uint64_t header_low_bits) { + constexpr uint32_t kHashShift = 12; + constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift; + std::vector hash_input(meta_size + 2); + std::memcpy(hash_input.data(), meta_bytes, meta_size); + hash_input[meta_size] = static_cast(header_low_bits); + hash_input[meta_size + 1] = static_cast(header_low_bits >> 8); + int64_t hash_out[2] = {0, 0}; + MurmurHash3_x64_128(hash_input.data(), static_cast(hash_input.size()), + 47, hash_out); + uint64_t shifted = static_cast(hash_out[0]) << kHashShift; + if (static_cast(shifted) < 0) { + shifted = ~shifted + 1; + } + return shifted & kHashBitsMask; +} + +uint64_t +compute_body_only_type_meta_hash_bits_for_test(const uint8_t *meta_bytes, + size_t meta_size) { constexpr uint32_t kHashShift = 12; constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift; int64_t hash_out[2] = {0, 0}; @@ -829,7 +849,7 @@ TEST(SerializationTest, TypeMetaRejectsOverConsumedDeclaredSize) { EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData); } -TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) { +TEST(SerializationTest, TypeMetaHeaderUses52BitMetadataHash) { std::vector fields; fields.emplace_back( "value", FieldType(static_cast(TypeId::VARINT32), false)); @@ -869,6 +889,35 @@ TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) { parsed.value()->get_hash()); } +TEST(SerializationTest, TypeMetaRejectsBodyOnlyHeaderHash) { + TypeMeta meta = + TypeMeta::from_fields(static_cast(TypeId::STRUCT), "", "S", + false, 1, std::vector{}); + auto bytes_result = meta.to_bytes(); + ASSERT_TRUE(bytes_result.ok()) + << "TypeMeta serialization failed: " << bytes_result.error().to_string(); + + std::vector bytes = bytes_result.value(); + ASSERT_GT(bytes.size(), sizeof(uint64_t)); + uint64_t header = 0; + std::memcpy(&header, bytes.data(), sizeof(header)); + + constexpr uint32_t kHashShift = 12; + constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift; + uint64_t body_only_hash = compute_body_only_type_meta_hash_bits_for_test( + bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t)); + ASSERT_NE(header & kHashBitsMask, body_only_hash); + header = body_only_hash | (header & ~kHashBitsMask); + std::memcpy(bytes.data(), &header, sizeof(header)); + + Buffer buffer(bytes); + auto parsed = TypeMeta::from_bytes(buffer, nullptr); + ASSERT_FALSE(parsed.ok()); + EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData); + EXPECT_NE(parsed.error().to_string().find("metadata hash"), + std::string::npos); +} + TEST(SerializationTest, TypeMetaNonStructHeaderUsesDenseKindCode) { TypeMeta meta = TypeMeta::from_fields(static_cast(TypeId::ENUM), "", "E", false, @@ -902,7 +951,7 @@ TEST(SerializationTest, TypeMetaRejectsNonStructReservedKindBits) { ASSERT_NE(header & 0xff, 0xff); header &= ~(UINT64_MAX << 12); header |= compute_type_meta_hash_bits_for_test( - bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t)); + bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t), header); std::memcpy(bytes.data(), &header, sizeof(header)); Buffer buffer(bytes); diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index 039e7e8264..31252670d6 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -378,9 +378,15 @@ inline Result type_id_from_type_meta_kind(uint8_t kind_code) { } inline uint64_t compute_type_meta_hash_bits(const uint8_t *meta_bytes, - size_t meta_size) { + size_t meta_size, + uint64_t header_low_bits) { + std::vector hash_input(meta_size + 2); + std::memcpy(hash_input.data(), meta_bytes, meta_size); + hash_input[meta_size] = static_cast(header_low_bits); + hash_input[meta_size + 1] = static_cast(header_low_bits >> 8); int64_t hash_out[2] = {0, 0}; - MurmurHash3_x64_128(meta_bytes, static_cast(meta_size), 47, hash_out); + MurmurHash3_x64_128(hash_input.data(), static_cast(hash_input.size()), + 47, hash_out); uint64_t shifted = static_cast(hash_out[0]) << TYPE_META_HASH_SHIFT; if (static_cast(shifted) < 0) { shifted = ~shifted + 1; @@ -390,8 +396,10 @@ inline uint64_t compute_type_meta_hash_bits(const uint8_t *meta_bytes, inline int64_t compute_type_meta_hash(const uint8_t *meta_bytes, size_t meta_size) { + uint64_t header_low_bits = + std::min(META_SIZE_MASK, static_cast(meta_size)); return static_cast( - compute_type_meta_hash_bits(meta_bytes, meta_size) >> + compute_type_meta_hash_bits(meta_bytes, meta_size, header_low_bits) >> TYPE_META_HASH_SHIFT); } @@ -434,7 +442,7 @@ read_type_meta_size(Buffer &buffer, uint64_t header, size_t *header_size) { inline Result validate_type_meta_hash(Buffer &buffer, uint32_t body_start, uint32_t meta_size, - int64_t header_hash) { + uint64_t header) { uint64_t body_end = static_cast(body_start) + meta_size; if (FORY_PREDICT_FALSE(body_end > buffer.reader_index() || body_end > buffer.size())) { @@ -442,10 +450,11 @@ inline Result validate_type_meta_hash(Buffer &buffer, Error::invalid_data("TypeMeta body range is not readable")); } uint64_t computed_hash_bits = compute_type_meta_hash_bits( - buffer.data() + body_start, static_cast(meta_size)); + buffer.data() + body_start, static_cast(meta_size), + header & ~TYPE_META_HASH_BITS_MASK); if (FORY_PREDICT_FALSE((computed_hash_bits >> TYPE_META_HASH_SHIFT) != - static_cast(header_hash))) { - return Unexpected(Error::invalid_data("TypeMeta body hash mismatch")); + (header >> TYPE_META_HASH_SHIFT))) { + return Unexpected(Error::invalid_data("TypeMeta metadata hash mismatch")); } return Result(); } @@ -574,7 +583,8 @@ Result, Error> TypeMeta::to_bytes() const { uint64_t meta_size = layer_size; uint64_t header = std::min(META_SIZE_MASK, meta_size); - header |= compute_type_meta_hash_bits(layer_buffer.data(), layer_size); + header |= + compute_type_meta_hash_bits(layer_buffer.data(), layer_size, header); result_buffer.write_bytes(reinterpret_cast(&header), sizeof(header)); @@ -700,7 +710,7 @@ TypeMeta::from_bytes(Buffer &buffer, const TypeMeta *local_type_info) { "TypeMeta parser did not consume declared meta size")); } FORY_RETURN_IF_ERROR( - validate_type_meta_hash(buffer, body_start, meta_size, meta_hash)); + validate_type_meta_hash(buffer, body_start, meta_size, header_bits)); auto meta = std::make_unique(); meta->hash = meta_hash; @@ -811,7 +821,7 @@ TypeMeta::from_bytes_with_header(Buffer &buffer, int64_t header) { "TypeMeta parser did not consume declared meta size")); } FORY_RETURN_IF_ERROR( - validate_type_meta_hash(buffer, start_pos, meta_size, meta_hash)); + validate_type_meta_hash(buffer, start_pos, meta_size, header_bits)); auto meta = std::make_unique(); meta->hash = meta_hash; diff --git a/csharp/src/Fory/ReadContext.cs b/csharp/src/Fory/ReadContext.cs index 6444500c40..2192bb55cf 100644 --- a/csharp/src/Fory/ReadContext.cs +++ b/csharp/src/Fory/ReadContext.cs @@ -205,7 +205,7 @@ internal TypeMeta ReadTypeMeta() if (TryGetCachedReadTypeMeta(header, out TypeMeta cachedTypeMeta)) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeMeta parse and 52-bit body-hash validation. The current body + // after a successful TypeMeta parse and 52-bit metadata-hash validation. The current body // size still comes from the current header bytes, not from the cached TypeMeta. TypeMeta.SkipBody(Reader, header); StoreReadTypeMeta(cachedTypeMeta, index); diff --git a/csharp/src/Fory/TypeMeta.cs b/csharp/src/Fory/TypeMeta.cs index 4312a5af86..216d34dfcd 100644 --- a/csharp/src/Fory/TypeMeta.cs +++ b/csharp/src/Fory/TypeMeta.cs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +using System.Buffers; + namespace Apache.Fory; internal static class TypeMetaConstants @@ -467,9 +469,8 @@ public byte[] Encode() } byte[] body = EncodeBody(); - ulong header = ComputeHeaderHashBits(body); - uint bodySize = (uint)Math.Min(body.Length, (int)TypeMetaConstants.TypeMetaSizeMask); - header |= bodySize; + ulong headerLowBits = ComputeHeaderLowBits(body.Length, compressed: false); + ulong header = ComputeHeaderHashBits(body, headerLowBits) | headerLowBits; ByteWriter writer = new(body.Length + 16); writer.WriteUInt64(header); if (body.Length >= (int)TypeMetaConstants.TypeMetaSizeMask) @@ -609,18 +610,47 @@ internal static void SkipBody(ByteReader reader, ulong header) reader.Skip(ReadBodySize(reader, header)); } - private static ulong ComputeHeaderHashBits(ReadOnlySpan body) + private static ulong ComputeHeaderLowBits(int bodyLength, bool compressed) + { + ulong headerLowBits = (ulong)Math.Min(bodyLength, (int)TypeMetaConstants.TypeMetaSizeMask); + if (compressed) + { + headerLowBits |= TypeMetaConstants.TypeMetaCompressedFlag; + } + + return headerLowBits; + } + + private static ulong ComputeHeaderHashBits(ReadOnlySpan body, ulong headerLowBits) { - (ulong bodyHash, _) = MurmurHash3.X64_128(body, TypeMetaConstants.TypeMetaHashSeed); - ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift; - long signed = unchecked((long)shifted); - long absSigned = signed == long.MinValue ? signed : Math.Abs(signed); - return unchecked((ulong)absSigned) & TypeMetaConstants.TypeMetaHashMask; + int hashInputLength = body.Length + sizeof(ushort); + byte[]? rented = null; + Span hashInput = hashInputLength <= 1024 + ? stackalloc byte[hashInputLength] + : (rented = ArrayPool.Shared.Rent(hashInputLength)).AsSpan(0, hashInputLength); + try + { + body.CopyTo(hashInput); + hashInput[body.Length] = unchecked((byte)headerLowBits); + hashInput[body.Length + 1] = unchecked((byte)(headerLowBits >> 8)); + (ulong bodyHash, _) = MurmurHash3.X64_128(hashInput, TypeMetaConstants.TypeMetaHashSeed); + ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift; + long signed = unchecked((long)shifted); + long absSigned = signed == long.MinValue ? signed : Math.Abs(signed); + return unchecked((ulong)absSigned) & TypeMetaConstants.TypeMetaHashMask; + } + finally + { + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + } } private static void ValidateParsedTypeMetaHash(ulong header, ReadOnlySpan body) { - ulong expectedHeaderHash = ComputeHeaderHashBits(body); + ulong expectedHeaderHash = ComputeHeaderHashBits(body, header & ~TypeMetaConstants.TypeMetaHashMask); ulong actualHeaderHash = header & TypeMetaConstants.TypeMetaHashMask; if (actualHeaderHash != expectedHeaderHash) { diff --git a/csharp/tests/Fory.Tests/ForyRuntimeTests.cs b/csharp/tests/Fory.Tests/ForyRuntimeTests.cs index 7e1b0bc107..511713cf37 100644 --- a/csharp/tests/Fory.Tests/ForyRuntimeTests.cs +++ b/csharp/tests/Fory.Tests/ForyRuntimeTests.cs @@ -16,6 +16,7 @@ // under the License. using System.Buffers; +using System.Buffers.Binary; using System.Collections.Concurrent; using System.Collections.Immutable; using System.Threading.Tasks; @@ -1735,6 +1736,33 @@ public void CompatibleTypeMetaCacheMissValidatesBodyHashBeforeCaching() Assert.Contains("TypeMeta metadata hash mismatch", exception.Message, StringComparison.Ordinal); } + [Fact] + public void TypeMetaHeaderHashIncludesLowHeaderBits() + { + TypeMeta typeMeta = new( + (uint)TypeId.CompatibleStruct, + 201, + MetaString.Empty('.', '_'), + MetaString.Empty('$', '_'), + registerByName: false, + [new TypeMetaFieldInfo(1, "value", new TypeMetaFieldType((uint)TypeId.String, true))]); + byte[] encoded = typeMeta.Encode(); + ulong header = BinaryPrimitives.ReadUInt64LittleEndian(encoded); + int bodyOffset = TypeMetaBodyOffset(encoded, header); + ulong hashMask = ulong.MaxValue << 12; + ulong bodyOnlyHash = BodyOnlyTypeMetaHashBits(encoded.AsSpan(bodyOffset)); + Assert.NotEqual(header & hashMask, bodyOnlyHash); + + byte[] malformed = (byte[])encoded.Clone(); + BinaryPrimitives.WriteUInt64LittleEndian( + malformed, + bodyOnlyHash | (header & ~hashMask)); + + InvalidDataException exception = + Assert.Throws(() => TypeMeta.Decode(malformed)); + Assert.Contains("TypeMeta metadata hash mismatch", exception.Message, StringComparison.Ordinal); + } + [Fact] public void TypeMetaAssignFieldIdsPrefersIdAndFallsBackToName() { @@ -1889,6 +1917,27 @@ private static byte[] CorruptCompatibleTypeMetaBody(byte[] payload) return malformed; } + private static int TypeMetaBodyOffset(byte[] encoded, ulong header) + { + ByteReader reader = new(encoded); + _ = reader.ReadUInt64(); + if ((header & 0xff) == 0xff) + { + _ = reader.ReadVarUInt32(); + } + + return reader.Cursor; + } + + private static ulong BodyOnlyTypeMetaHashBits(ReadOnlySpan body) + { + (ulong bodyHash, _) = MurmurHash3.X64_128(body, 47); + ulong shifted = bodyHash << 12; + long signed = unchecked((long)shifted); + long absSigned = signed == long.MinValue ? signed : Math.Abs(signed); + return unchecked((ulong)absSigned) & (ulong.MaxValue << 12); + } + private static (int TypeMetaStart, int TypeMetaEnd, TypeMeta TypeMeta) ReadCompatibleTypeMetaRange(byte[] payload) { ByteReader reader = new(payload); diff --git a/dart/packages/fory/lib/src/meta/type_meta.dart b/dart/packages/fory/lib/src/meta/type_meta.dart index 8b32f199fa..7d77e17252 100644 --- a/dart/packages/fory/lib/src/meta/type_meta.dart +++ b/dart/packages/fory/lib/src/meta/type_meta.dart @@ -57,6 +57,7 @@ final class WireTypeMeta { final class TypeHeader { static const int _compressMetaFlag = 1 << 8; static const int _reservedMetaFlags = 0x0e00; + static const int _headerLowBitsMask = 0x0fff; static const int _hashLow32Mask = 0xfffff000; final Int64 value; @@ -89,7 +90,10 @@ final class TypeHeader { @pragma('vm:prefer-inline') void validateBodyHash(Uint8List body) { - final expected = typeDefHeader(body); + final expected = typeDefHeader( + body, + headerLowBits: value.low32 & _headerLowBitsMask, + ); if (value.high32Unsigned != expected.high32Unsigned || (value.low32 & _hashLow32Mask) != (expected.low32 & _hashLow32Mask)) { throw StateError('Invalid TypeDef metadata hash.'); diff --git a/dart/packages/fory/lib/src/resolver/type_resolver.dart b/dart/packages/fory/lib/src/resolver/type_resolver.dart index b9d095d39f..adecdca05c 100644 --- a/dart/packages/fory/lib/src/resolver/type_resolver.dart +++ b/dart/packages/fory/lib/src/resolver/type_resolver.dart @@ -1070,7 +1070,7 @@ final class TypeResolver { final expectedTypeDef = expectedType?.typeDef; if (expectedTypeDef != null && expectedTypeDef.header == header.value) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeDef parse and 52-bit body-hash validation. + // after a successful TypeDef parse and 52-bit metadata-hash validation. header.skipRemaining(buffer); sharedTypes.add(expectedType!); return wireTypeMetaForResolved(expectedType); @@ -1078,7 +1078,7 @@ final class TypeResolver { final cached = _parsedTypeMetaCache.lookup(header); if (cached != null) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeDef parse and 52-bit body-hash validation. + // after a successful TypeDef parse and 52-bit metadata-hash validation. header.skipRemaining(buffer); sharedTypes.add(cached); return wireTypeMetaForResolved(cached); diff --git a/dart/packages/fory/lib/src/util/hash_util.dart b/dart/packages/fory/lib/src/util/hash_util.dart index 834f6d620d..e80874c493 100644 --- a/dart/packages/fory/lib/src/util/hash_util.dart +++ b/dart/packages/fory/lib/src/util/hash_util.dart @@ -154,18 +154,23 @@ Int64 metaStringHash(List bytes, {int encoding = 0}) { Int64 typeDefHeader( List bytes, { bool compressed = false, + int? headerLowBits, }) { - final hash = _int64FromUint64( - _murmurHash3X64_128Bits(bytes).$1 << _typeDefHashShift, - ); - var header = _absSigned64Bits(hash); - if (compressed) { - header = header | _typeDefCompressMetaFlag; - } - header = header | + var lowBits = headerLowBits ?? (bytes.length > _typeDefMetaSizeMask ? _typeDefMetaSizeMask : bytes.length); + if (compressed) { + lowBits |= _typeDefCompressMetaFlag; + } + final hashInput = List.of(bytes, growable: true) + ..add(lowBits & 0xff) + ..add((lowBits >> 8) & 0xff); + final hash = _int64FromUint64( + _murmurHash3X64_128Bits(hashInput).$1 << _typeDefHashShift, + ); + var header = _absSigned64Bits(hash); + header = header | lowBits; return _int64FromUint64(header); } diff --git a/dart/packages/fory/test/xlang_protocol_test.dart b/dart/packages/fory/test/xlang_protocol_test.dart index c8bb77f1a0..355ef4252d 100644 --- a/dart/packages/fory/test/xlang_protocol_test.dart +++ b/dart/packages/fory/test/xlang_protocol_test.dart @@ -172,6 +172,18 @@ void main() { ), ), ); + + final headerWithDifferentLowBits = TypeHeader(header.value ^ 1); + expect( + () => headerWithDifferentLowBits.validateBodyHash(body), + throwsA( + isA().having( + (error) => error.toString(), + 'message', + contains('metadata hash'), + ), + ), + ); }); }); } diff --git a/docs/specification/java_serialization_spec.md b/docs/specification/java_serialization_spec.md index c52695f1cc..094dcb1351 100644 --- a/docs/specification/java_serialization_spec.md +++ b/docs/specification/java_serialization_spec.md @@ -206,9 +206,15 @@ Header layout (lower bits on the right): ``` - size: lower 8 bits. If size equals the mask (0xFF), write extra size as varuint32 and add it. -- compress: bit 8, set when payload is compressed. +- compress: bit 8, set when class meta bytes are compressed. - reserved: bits 9-11 are reserved for future use and must be zero. -- hash: 52-bit hash of the payload. +- hash: 52 stored hash bits derived from MurmurHash3 x64_128 seed 47 over + `class meta bytes || header_low12_le`. `header_low12_le` is two little-endian bytes containing + the low 12 header bits (size, compress, and reserved bits); the upper four bits of the second + byte are zero. Take lane 0 of the 128-bit MurmurHash3 result as a signed int64, left-shift it by + 12 with two's-complement 64-bit wraparound, apply signed absolute value (leaving `INT64_MIN` + unchanged), then mask with `0xfffffffffffff000`. The final header is the masked hash bits OR-ed + with the low 12 header bits. ### Class meta bytes diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index 851791edbc..516622ae64 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -557,7 +557,13 @@ The 8-byte header is a little-endian uint64: Current xlang writers MUST leave this bit unset and current xlang readers MUST treat a set bit as unsupported. - Bits 9-11: reserved for future extension (must be zero). -- High 52 bits: hash of the TypeDef body. +- High 52 bits: stored hash bits derived from MurmurHash3 x64_128 seed 47 over + `TypeDef body || header_low12_le`. `header_low12_le` is two little-endian bytes containing the low + 12 header bits (size, compress, and reserved bits); the upper four bits of the second byte are + zero. Take lane 0 of the 128-bit MurmurHash3 result as a signed int64, left-shift it by 12 with + two's-complement 64-bit wraparound, apply signed absolute value (leaving `INT64_MIN` unchanged), + then mask with `0xfffffffffffff000`. The final header is the masked hash bits OR-ed with the low + 12 header bits. #### TypeDef body diff --git a/go/fory/type_def.go b/go/fory/type_def.go index e1ab7c3eb4..7520c42a6a 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -35,7 +35,7 @@ const ( /* TypeDef represents a transportable value object containing type information and field definitions. typeDef are layout as following: - - first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits meta size) + - first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1 bit compress flag + 8 bits meta size) - next 1 byte: kind header - next variable bytes: type id (varint) or ns name + type name - next variable bytes: field definitions (see below) @@ -286,7 +286,7 @@ func readTypeDef(fory *Fory, buffer *ByteBuffer, header int64, err *Error) *Type func skipTypeDef(buffer *ByteBuffer, header int64, err *Error) { // Header-cache hits intentionally treat the current body as opaque bytes and skip by the size in // the current header. Parsed TypeDefs are published to the cache only after successful body parse - // and 52-bit body-hash validation; cache hits must not reparse or rehash that body. + // and 52-bit metadata-hash validation; cache hits must not reparse or rehash that body. sz := int(header & META_SIZE_MASK) if sz == META_SIZE_MASK { sz += int(buffer.ReadVarUint32(err)) @@ -672,7 +672,7 @@ func getFieldNameEncodingIndex(encoding meta.Encoding) int { /* encodingTypeDef encodes a TypeDef into binary format according to the specification typeDef are layout as following: -- first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits meta size) +- first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1 bit compress flag + 8 bits meta size) - next 1 byte: kind header - next variable bytes: type id (varint) or ns name + type name - next variable bytes: field defs (see below) @@ -806,20 +806,15 @@ func encodingTypeDef(typeResolver *TypeResolver, typeDef *TypeDef) ([]byte, erro // prependGlobalHeader writes the 8-byte global header func prependGlobalHeader(buffer *ByteBuffer, isCompressed bool) (*ByteBuffer, error) { - var header uint64 metaSize := buffer.WriterIndex() - - header |= typeDefHeaderHash(buffer.GetByteSlice(0, metaSize)) - - if isCompressed { - header |= COMPRESS_META_FLAG + headerLowBits := uint64(metaSize) + if metaSize >= META_SIZE_MASK { + headerLowBits = META_SIZE_MASK } - - if metaSize < META_SIZE_MASK { - header |= uint64(metaSize) & META_SIZE_MASK - } else { - header |= META_SIZE_MASK // Set to max value, actual size will follow + if isCompressed { + headerLowBits |= COMPRESS_META_FLAG } + header := typeDefHeaderHash(buffer.GetByteSlice(0, metaSize), headerLowBits) | headerLowBits result := NewByteBuffer(make([]byte, metaSize+8)) result.WriteInt64(int64(header)) @@ -990,7 +985,7 @@ func writeFieldDef(typeResolver *TypeResolver, buffer *ByteBuffer, field FieldDe /* decodeTypeDef decodes a TypeDef from the buffer typeDef are layout as following: - - first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits meta size) + - first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1 bit compress flag + 8 bits meta size) - next 1 byte: kind header - next variable bytes: type id (varint) or ns name + type name - next variable bytes: field definitions (see below) @@ -1275,8 +1270,12 @@ func buildTypeDefEncoded(header int64, metaSizeBits, extraMetaSize int, metaByte return buffer.Bytes() } -func typeDefHeaderHash(data []byte) uint64 { - hash := int64(Murmur3Sum64WithSeed(data, 47) << (64 - NUM_HASH_BITS)) +func typeDefHeaderHash(data []byte, headerLowBits uint64) uint64 { + hashInput := make([]byte, len(data)+2) + copy(hashInput, data) + hashInput[len(data)] = byte(headerLowBits) + hashInput[len(data)+1] = byte(headerLowBits >> 8) + hash := int64(Murmur3Sum64WithSeed(hashInput, 47) << (64 - NUM_HASH_BITS)) if hash < 0 { hash = -hash } @@ -1295,7 +1294,7 @@ func validateParsedTypeDefHash(header int64, metaSizeBits, extraMetaSize int, en } hashMask := ^uint64(0) hashMask <<= uint(64 - NUM_HASH_BITS) - expectedHeaderHash := typeDefHeaderHash(encoded) + expectedHeaderHash := typeDefHeaderHash(encoded, uint64(header)&^hashMask) actualHeaderHash := uint64(header) & hashMask if expectedHeaderHash != actualHeaderHash { return fmt.Errorf("invalid TypeDef metadata hash") diff --git a/go/fory/type_def_test.go b/go/fory/type_def_test.go index 37b465001a..ef1627d7bd 100644 --- a/go/fory/type_def_test.go +++ b/go/fory/type_def_test.go @@ -418,6 +418,25 @@ func TestTypeDefRejectsMetadataHashMismatch(t *testing.T) { require.Contains(t, err.Error(), "metadata hash") } +func TestTypeDefHeaderHashIncludesHeaderLowBits(t *testing.T) { + fory := NewFory() + body := typeDefTestBodyWithoutFields() + _, header := typeDefTestFrame(t, body, false) + + hashMask := ^uint64(0) + hashMask <<= uint(64 - NUM_HASH_BITS) + bodyOnlyHash := bodyOnlyTypeDefHeaderHash(body) + require.NotEqual(t, uint64(header)&hashMask, bodyOnlyHash) + rewrittenHeader := int64(bodyOnlyHash | (uint64(header) &^ hashMask)) + buffer := NewByteBuffer(nil) + buffer.WriteBinary(body) + buffer.SetReaderIndex(0) + + _, err := decodeTypeDef(fory, buffer, rewrittenHeader) + require.Error(t, err) + require.Contains(t, err.Error(), "metadata hash") +} + func TestTypeDefRejectsEncodedMetadataAboveMaxBinarySize(t *testing.T) { fory := NewFory(WithMaxBinarySize(1)) body := typeDefTestBodyWithoutFields() @@ -523,6 +542,16 @@ func TestTypeDefRejectsFieldNameLengthBeyondMetadata(t *testing.T) { require.Contains(t, err.Error(), "field name length") } +func bodyOnlyTypeDefHeaderHash(data []byte) uint64 { + hash := int64(Murmur3Sum64WithSeed(data, 47) << (64 - NUM_HASH_BITS)) + if hash < 0 { + hash = -hash + } + hashMask := ^uint64(0) + hashMask <<= uint(64 - NUM_HASH_BITS) + return uint64(hash) & hashMask +} + // TestTypeDefNestedRecursionStackOverflowPanic verifies that readFieldTypeWithFlags // rejects a crafted payload with 20 million nested LIST types, returning an error // at depth 64 instead of recursing until a goroutine stack overflow crashes the process. diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index 3838e67e25..1090871700 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -1636,7 +1636,7 @@ func (r *TypeResolver) readSharedTypeMeta(buffer *ByteBuffer, err *Error) *TypeI var td *TypeDef if existingTd, exists := r.defIdToTypeDef[id]; exists { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeDef parse and 52-bit body-hash validation. + // after a successful TypeDef parse and 52-bit metadata-hash validation. skipTypeDef(buffer, id, err) td = existingTd } else { diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java index b4f50c2c0d..a4d1256017 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java @@ -38,7 +38,6 @@ import org.apache.fory.resolver.TypeResolver; import org.apache.fory.serializer.UnknownClass; import org.apache.fory.type.Types; -import org.apache.fory.util.MurmurHash3; import org.apache.fory.util.Preconditions; /** @@ -259,10 +258,9 @@ static void validateParsedTypeDefHash(long id, byte[] encoded) { if (encoded.length - bodyOffset != size) { throw new DeserializationException("Invalid TypeDef encoded size"); } - long hash = MurmurHash3.murmurhash3_x64_128(encoded, bodyOffset, size, 47)[0]; - hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS); long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS); - long expectedHeaderHash = Math.abs(hash) & hashMask; + long expectedHeaderHash = + NativeTypeDefEncoder.computeTypeDefHashBits(encoded, bodyOffset, size, id & ~hashMask); long actualHeaderHash = id & hashMask; if (expectedHeaderHash != actualHeaderHash) { throw new DeserializationException("Invalid TypeDef metadata hash"); diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java index 6173a9e88d..4ef3979d9b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java @@ -238,14 +238,12 @@ private static boolean hasFieldMetadata(Map> classLayers static MemoryBuffer prependHeader(MemoryBuffer buffer, boolean isCompressed) { int metaSize = buffer.writerIndex(); - long hash = MurmurHash3.murmurhash3_x64_128(buffer.getHeapMemory(), 0, metaSize, 47)[0]; - hash <<= (64 - NUM_HASH_BITS); - // this id will be part of generated codec, a negative number won't be allowed in class name. - long header = Math.abs(hash); + long headerLowBits = Math.min(metaSize, META_SIZE_MASKS); if (isCompressed) { - header |= COMPRESS_META_FLAG; + headerLowBits |= COMPRESS_META_FLAG; } - header |= Math.min(metaSize, META_SIZE_MASKS); + long header = + computeTypeDefHashBits(buffer.getHeapMemory(), 0, metaSize, headerLowBits) | headerLowBits; MemoryBuffer result = MemoryUtils.buffer(metaSize + 8); result.writeInt64(header); if (metaSize >= META_SIZE_MASKS) { @@ -255,6 +253,18 @@ static MemoryBuffer prependHeader(MemoryBuffer buffer, boolean isCompressed) { return result; } + static long computeTypeDefHashBits(byte[] bytes, int offset, int size, long headerLowBits) { + byte[] hashInput = new byte[size + Short.BYTES]; + System.arraycopy(bytes, offset, hashInput, 0, size); + hashInput[size] = (byte) headerLowBits; + hashInput[size + 1] = (byte) (headerLowBits >>> Byte.SIZE); + long hash = MurmurHash3.murmurhash3_x64_128(hashInput, 0, hashInput.length, 47)[0]; + hash <<= (64 - NUM_HASH_BITS); + long hashMask = -1L << (Long.SIZE - NUM_HASH_BITS); + // this id will be part of generated codec, a negative number won't be allowed in class name. + return Math.abs(hash) & hashMask; + } + static int nativeKindCode(int typeId) { switch (typeId) { case Types.STRUCT: diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java b/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java index 6069472e91..838e7f36d6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java @@ -116,10 +116,8 @@ public class TypeDef implements Serializable { public static void skipTypeDef(MemoryBuffer buffer, long id) { // Header-cache hits intentionally treat the current body as opaque bytes and skip by the size - // in - // the current header. Parsed TypeDefs are published to the cache only after successful body - // parse - // and 52-bit body-hash validation; cache hits must not reparse or rehash that body. + // in the current header. Parsed TypeDefs are published to the cache only after successful body + // parse and 52-bit metadata-hash validation; cache hits must not reparse or rehash that body. int size = (int) (id & META_SIZE_MASKS); if (size == META_SIZE_MASKS) { int extendedSize = buffer.readVarUInt32Small14(); diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java index c5856d6d21..4c690c9a02 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java @@ -761,7 +761,7 @@ protected final TypeInfo readTypeInfoFromBytes( simpleClassNameBytes = metaStringReader.readMetaString(buffer, typeNameBytesCache); // MetaStringReader returns the provided cache object only when the wire identity matches. For - // big meta strings, body-hash validation happens before the entry is first cached. + // big meta strings, metadata-hash validation happens before the entry is first cached. if (typeNameBytesCache == simpleClassNameBytes && packageNameBytesCache == namespaceBytes) { return typeInfoCache; } @@ -793,7 +793,7 @@ protected final TypeInfo readSharedClassMeta(ReadContext readContext) { } else { // New type in stream, with optimized reuse by validated TypeDef header. A header-cache // hit intentionally skips the body without rehashing: entries are published only after the - // TypeDef body has parsed successfully and matched the 52-bit body hash. + // TypeDef body has parsed successfully and matched the 52-bit metadata hash. long id = buffer.readInt64(); typeInfo = extRegistry.typeInfoByTypeDefId.get(id); if (typeInfo != null) { diff --git a/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java b/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java index 22d988dfcf..0a6bd94679 100644 --- a/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java @@ -37,6 +37,7 @@ import org.apache.fory.test.bean.MapFields; import org.apache.fory.test.bean.Struct; import org.apache.fory.type.Types; +import org.apache.fory.util.MurmurHash3; import org.testng.Assert; import org.testng.annotations.Test; @@ -258,6 +259,17 @@ public void testDecodeRejectsParsedTypeDefWithMismatchedHash() { () -> TypeDef.readTypeDef(fory.getTypeResolver(), MemoryBuffer.fromByteArray(malformed))); } + @Test + public void testDecodeRejectsBodyOnlyHeaderHash() { + Fory fory = Fory.builder().withMetaShare(true).build(); + TypeDef typeDef = TypeDef.buildTypeDef(fory.getTypeResolver(), Foo1.class); + byte[] malformed = rewriteHeaderWithBodyOnlyHash(typeDef); + + Assert.assertThrows( + DeserializationException.class, + () -> TypeDef.readTypeDef(fory.getTypeResolver(), MemoryBuffer.fromByteArray(malformed))); + } + @Test public void testDecodeRejectsHashConsistentMalformedTypeDefBody() { Fory fory = Fory.builder().withMetaShare(true).build(); @@ -277,6 +289,35 @@ private static byte[] corruptEncodedBody(TypeDef typeDef, String needle) { return malformed; } + private static byte[] rewriteHeaderWithBodyOnlyHash(TypeDef typeDef) { + byte[] malformed = typeDef.getEncoded().clone(); + MemoryBuffer buffer = MemoryBuffer.fromByteArray(malformed); + long header = buffer.readInt64(); + int bodyOffset = typeDefBodyOffset(malformed); + int size = malformed.length - bodyOffset; + long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS); + long bodyOnlyHash = bodyOnlyTypeDefHashBits(malformed, bodyOffset, size); + Assert.assertNotEquals(header & hashMask, bodyOnlyHash); + MemoryBuffer.fromByteArray(malformed).putInt64(0, bodyOnlyHash | (header & ~hashMask)); + return malformed; + } + + private static long bodyOnlyTypeDefHashBits(byte[] bytes, int offset, int size) { + long hash = MurmurHash3.murmurhash3_x64_128(bytes, offset, size, 47)[0]; + hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS); + long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS); + return Math.abs(hash) & hashMask; + } + + private static int typeDefBodyOffset(byte[] encoded) { + MemoryBuffer buffer = MemoryBuffer.fromByteArray(encoded); + long header = buffer.readInt64(); + if ((header & TypeDef.META_SIZE_MASKS) == TypeDef.META_SIZE_MASKS) { + buffer.readVarUInt32Small14(); + } + return buffer.readerIndex(); + } + private static int indexOf(byte[] bytes, byte[] needle, int fromIndex) { for (int i = fromIndex; i <= bytes.length - needle.length; i++) { boolean match = true; diff --git a/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java b/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java index c62bbee1da..a4978d4c76 100644 --- a/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java @@ -30,6 +30,7 @@ import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.resolver.TypeResolver; import org.apache.fory.type.Types; +import org.apache.fory.util.MurmurHash3; import org.testng.Assert; import org.testng.annotations.Test; @@ -483,6 +484,18 @@ public void testDecodeRejectsParsedTypeDefWithMismatchedHash() { () -> TypeDef.readTypeDef(fory.getTypeResolver(), MemoryBuffer.fromByteArray(malformed))); } + @Test + public void testDecodeRejectsBodyOnlyHeaderHash() { + Fory fory = Fory.builder().withXlang(true).withCompatible(false).withMetaShare(true).build(); + fory.register(ClassWithNoAnnotations.class); + TypeDef typeDef = TypeDef.buildTypeDef(fory.getTypeResolver(), ClassWithNoAnnotations.class); + byte[] malformed = rewriteHeaderWithBodyOnlyHash(typeDef); + + Assert.assertThrows( + DeserializationException.class, + () -> TypeDef.readTypeDef(fory.getTypeResolver(), MemoryBuffer.fromByteArray(malformed))); + } + @Test public void testDecodeRejectsHashConsistentMalformedTypeDefBody() { Fory fory = Fory.builder().withXlang(true).withCompatible(false).withMetaShare(true).build(); @@ -537,6 +550,26 @@ private static byte[] corruptEncodedBody(TypeDef typeDef, String needle) { return malformed; } + private static byte[] rewriteHeaderWithBodyOnlyHash(TypeDef typeDef) { + byte[] malformed = typeDef.getEncoded().clone(); + MemoryBuffer buffer = MemoryBuffer.fromByteArray(malformed); + long header = buffer.readInt64(); + int bodyOffset = typeDefBodyOffset(malformed); + int size = malformed.length - bodyOffset; + long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS); + long bodyOnlyHash = bodyOnlyTypeDefHashBits(malformed, bodyOffset, size); + Assert.assertNotEquals(header & hashMask, bodyOnlyHash); + MemoryBuffer.fromByteArray(malformed).putInt64(0, bodyOnlyHash | (header & ~hashMask)); + return malformed; + } + + private static long bodyOnlyTypeDefHashBits(byte[] bytes, int offset, int size) { + long hash = MurmurHash3.murmurhash3_x64_128(bytes, offset, size, 47)[0]; + hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS); + long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS); + return Math.abs(hash) & hashMask; + } + private static int indexOf(byte[] bytes, byte[] needle, int fromIndex) { for (int i = fromIndex; i <= bytes.length - needle.length; i++) { boolean match = true; diff --git a/javascript/packages/core/lib/context.ts b/javascript/packages/core/lib/context.ts index 956c904458..0f59afe694 100644 --- a/javascript/packages/core/lib/context.ts +++ b/javascript/packages/core/lib/context.ts @@ -716,7 +716,7 @@ export class ReadContext { let typeMeta: TypeMeta; if (cached) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeMeta parse and 52-bit body-hash validation. The current body + // after a successful TypeMeta parse and 52-bit metadata-hash validation. The current body // size still comes from the current header bytes, not from the cached TypeMeta. TypeMeta.skipBodyByHeaderLow(this.reader, headerLow); typeMeta = cached; diff --git a/javascript/packages/core/lib/meta/TypeMeta.ts b/javascript/packages/core/lib/meta/TypeMeta.ts index 321b26ec29..3bec551681 100644 --- a/javascript/packages/core/lib/meta/TypeMeta.ts +++ b/javascript/packages/core/lib/meta/TypeMeta.ts @@ -552,7 +552,10 @@ export class TypeMeta { } private static validateParsedBodyHash(header: bigint, body: Uint8Array) { - const expectedHeaderHash = TypeMeta.headerHashBits(body); + const expectedHeaderHash = TypeMeta.headerHashBits( + body, + header & ~HEADER_HASH_MASK, + ); const actualHeaderHash = header & HEADER_HASH_MASK; if (expectedHeaderHash !== actualHeaderHash) { throw new Error("TypeMeta metadata hash mismatch"); @@ -954,19 +957,24 @@ export class TypeMeta { } private static buildHeader(buffer: Uint8Array, isCompressed: boolean) { - let header = TypeMeta.headerHashBits(buffer); + let headerLowBits = BigInt(Math.min(buffer.length, META_SIZE_MASKS)); if (isCompressed) { - header |= COMPRESS_META_FLAG; + headerLowBits |= COMPRESS_META_FLAG; } - header |= BigInt(Math.min(buffer.length, META_SIZE_MASKS)); + const header = TypeMeta.headerHashBits(buffer, headerLowBits) + | headerLowBits; return { header: BigInt.asUintN(64, header), headerHash: Number(header >> HASH_SHIFT_BITS), }; } - private static headerHashBits(buffer: Uint8Array) { - const hash = x64hash128(buffer, 47); + private static headerHashBits(buffer: Uint8Array, headerLowBits: bigint) { + const hashInput = new Uint8Array(buffer.length + 2); + hashInput.set(buffer); + hashInput[buffer.length] = Number(headerLowBits & 0xffn); + hashInput[buffer.length + 1] = Number((headerLowBits >> 8n) & 0xffn); + const hash = x64hash128(hashInput, 47); // Read the high 64 bits of the 128-bit MurmurHash3 as a SIGNED // int64 to match pyfory (`hash_buffer()[0]` unpacks `int64_t[0]`), // java (`murmurhash3_x64_128(...)[0]` returns `long`), and rust diff --git a/javascript/test/typemeta.test.ts b/javascript/test/typemeta.test.ts index 29de8dda2b..66fba3e69e 100644 --- a/javascript/test/typemeta.test.ts +++ b/javascript/test/typemeta.test.ts @@ -25,6 +25,7 @@ import Fory, { } from "../packages/core/index"; import { ReadContext } from "../packages/core/lib/context"; import { TypeMeta } from "../packages/core/lib/meta/TypeMeta"; +import { x64hash128 } from "../packages/core/lib/murmurHash3"; import { BinaryReader } from "../packages/core/lib/reader"; import { BinaryWriter } from "../packages/core/lib/writer"; import { describe, expect, test } from "@jest/globals"; @@ -33,6 +34,9 @@ const COMPRESS_META_FLAG = 1n << 8n; const RESERVED_META_FLAGS = 0b111n << 9n; const META_SIZE_MASK = 0xffn; const HASH_SHIFT_BITS = 12n; +const LOW_HEADER_BITS_MASK = (1n << HASH_SHIFT_BITS) - 1n; +const UINT64_MASK = (1n << 64n) - 1n; +const HEADER_HASH_MASK = UINT64_MASK ^ LOW_HEADER_BITS_MASK; describe("typemeta", () => { test("writes TypeMeta header bits in the xlang layout", () => { @@ -108,6 +112,36 @@ describe("typemeta", () => { expect(skipReader.readGetCursor()).toBe(bytes.length); }); + test("includes TypeMeta header low bits in the metadata hash", () => { + const bytes = TypeMeta.fromTypeInfo( + Type.struct(7007, { + value: Type.string().setId(1), + }), + ).toBytes(); + const malformed = new Uint8Array(bytes); + const view = new DataView( + malformed.buffer, + malformed.byteOffset, + malformed.byteLength, + ); + const header = view.getBigUint64(0, true); + const bodyOffset = typeMetaBodyOffset(bytes); + const bodyOnlyHash = bodyOnlyHeaderHashBits(bytes.subarray(bodyOffset)); + expect(header & HEADER_HASH_MASK).not.toBe(bodyOnlyHash); + + view.setBigUint64( + 0, + bodyOnlyHash | (header & LOW_HEADER_BITS_MASK), + true, + ); + const reader = new BinaryReader({}); + reader.reset(malformed); + + expect(() => TypeMeta.fromBytes(reader)).toThrow( + "TypeMeta metadata hash mismatch", + ); + }); + test("TypeMeta header cache hit skips the current body size", () => { const header = 0xffn; const typeMeta = TypeMeta.fromTypeInfo(Type.struct(7010, {})); @@ -643,3 +677,15 @@ function typeMetaBodyOffset(bytes: Uint8Array) { } return reader.readGetCursor(); } + +function bodyOnlyHeaderHashBits(buffer: Uint8Array) { + const hash = x64hash128(buffer, 47); + let header = BigInt.asIntN( + 64, + hash.getBigInt64(0, false) << HASH_SHIFT_BITS, + ); + if (header < 0n) { + header = -header; + } + return BigInt.asUintN(64, header) & HEADER_HASH_MASK; +} diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 97551acc4b..cb3057962f 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -121,8 +121,9 @@ def xlang_non_struct_type_id(kind_code: int) -> int: raise ValueError(f"Unsupported TypeDef kind code {kind_code}") from exc -def _typedef_header_hash(encoded: bytes) -> int: - hash_value = hash_buffer(encoded, 47)[0] +def _typedef_header_hash(encoded: bytes, header_low_bits: int) -> int: + hash_input = encoded + bytes((header_low_bits & 0xFF, (header_low_bits >> 8) & 0xFF)) + hash_value = hash_buffer(hash_input, 47)[0] shifted = (hash_value << TYPEDEF_HASH_SHIFT) & _UINT64_MASK if shifted >= (1 << 63): shifted -= 1 << 64 diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index f221b41469..3f2b224c1c 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -45,6 +45,7 @@ is_named_typedef_kind, xlang_non_struct_type_id, _typedef_header_hash, + _UINT64_MASK, ) from pyfory.types import TypeId from pyfory._fory import NO_USER_TYPE_ID @@ -204,7 +205,8 @@ def decode_typedef(buffer: Buffer, resolver, header=None) -> TypeDef: def _validate_parsed_typedef_hash(header: int, encoded_meta_data: bytes) -> None: - if _typedef_header_hash(encoded_meta_data) != (header & TYPEDEF_HASH_MASK): + header_bits = header & _UINT64_MASK + if _typedef_header_hash(encoded_meta_data, header_bits & ~TYPEDEF_HASH_MASK) != (header_bits & TYPEDEF_HASH_MASK): raise ValueError("Invalid TypeDef metadata hash") diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index 69d83ba615..17051f7c7b 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -132,11 +132,11 @@ def encode_typedef(type_resolver, cls, include_fields: bool = True): def prepend_header(buffer: bytes, is_compressed: bool): """Prepend header to the buffer.""" meta_size = len(buffer) - header = _typedef_header_hash(buffer) + header_low_bits = min(meta_size, META_SIZE_MASKS) if is_compressed: - header |= COMPRESS_META_FLAG + header_low_bits |= COMPRESS_META_FLAG - header |= min(meta_size, META_SIZE_MASKS) + header = _typedef_header_hash(buffer, header_low_bits) | header_low_bits if header >= (1 << 63): header -= 1 << 64 result = Buffer.allocate(meta_size + 8) diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py index 50a869319e..8a4cb030f7 100644 --- a/python/pyfory/registry.py +++ b/python/pyfory/registry.py @@ -1162,7 +1162,7 @@ def _read_and_build_type_info(self, buffer): type_info = self._meta_shared_type_info.get(header) if type_info is not None: # Header-cache hits intentionally skip without rehashing. Entries reach this cache only - # after a successful TypeDef parse and 52-bit body-hash validation. + # after a successful TypeDef parse and 52-bit metadata-hash validation. skip_typedef(buffer, header) return type_info type_def = decode_typedef(buffer, self, header=header) diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 004ba5b6eb..733156cbc9 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -561,7 +561,7 @@ cdef class TypeResolver: cdef object type_def if typeinfo is not None: # Header-cache hits intentionally skip without rehashing. Entries reach this cache only - # after a successful TypeDef parse and 52-bit body-hash validation. + # after a successful TypeDef parse and 52-bit metadata-hash validation. _skip_typedef_fast(buffer, header) return typeinfo type_def = decode_typedef(buffer, self.resolver, header=header) diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index ed93d3fead..e91a42893e 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -36,6 +36,11 @@ DynamicFieldType, FIELD_NAME_ENCODINGS, COMPRESS_META_FLAG, + META_SIZE_MASKS, + TYPEDEF_HASH_MASK, + TYPEDEF_HASH_SHIFT, + _INT64_MIN, + _UINT64_MASK, ) from pyfory.meta.typedef_encoder import ( FIELD_NAME_ENCODER, @@ -47,6 +52,7 @@ from pyfory.types import TypeId from pyfory import Fory from pyfory.error import TypeNotCompatibleError +from pyfory.lib.mmh3 import hash_buffer try: import numpy as np @@ -252,6 +258,16 @@ def test_decode_typedef_rejects_parsed_body_with_mismatched_hash(): decode_typedef(Buffer(malformed), fory.type_resolver) +def test_decode_typedef_rejects_body_only_header_hash(): + fory = Fory(xlang=True, compatible=False) + fory.register(SimpleTypeDef, namespace="example", typename="SimpleTypeDef") + typedef = encode_typedef(fory.type_resolver, SimpleTypeDef) + malformed = _rewrite_header_with_body_only_hash(typedef.encoded) + + with pytest.raises(ValueError, match="Invalid TypeDef metadata hash"): + decode_typedef(Buffer(malformed), fory.type_resolver) + + def test_decode_typedef_rejects_hash_consistent_malformed_body(): fory = Fory(xlang=True, compatible=False) encoded = prepend_header(b"\x00", False) @@ -319,11 +335,33 @@ def _corrupt_encoded_field_name(typedef, field_name): def _typedef_body_offset(encoded): buffer = Buffer(encoded) header = buffer.read_int64() - if header & 0xFF == 0xFF: + if header & META_SIZE_MASKS == META_SIZE_MASKS: buffer.read_var_uint32() return buffer.get_reader_index() +def _rewrite_header_with_body_only_hash(encoded): + malformed = bytearray(encoded) + buffer = Buffer(encoded) + header = buffer.read_int64() & _UINT64_MASK + body_offset = _typedef_body_offset(encoded) + body_only_hash = _body_only_typedef_hash_bits(encoded[body_offset:]) + assert header & TYPEDEF_HASH_MASK != body_only_hash + rewritten_header = body_only_hash | (header & ~TYPEDEF_HASH_MASK) + malformed[:8] = rewritten_header.to_bytes(8, "little", signed=False) + return bytes(malformed) + + +def _body_only_typedef_hash_bits(encoded_body): + hash_value = hash_buffer(encoded_body, 47)[0] + shifted = (hash_value << TYPEDEF_HASH_SHIFT) & _UINT64_MASK + if shifted >= (1 << 63): + shifted -= 1 << 64 + if shifted != _INT64_MIN and shifted < 0: + shifted = -shifted + return (shifted & _UINT64_MASK) & TYPEDEF_HASH_MASK + + def test_nested_container_typedef_preserves_declared_encoding(): fory = Fory(xlang=True, compatible=False) fory.register(NestedEncodingTypeDef, namespace="example", typename="NestedEncodingTypeDef") diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index 7b3b8ef378..02b96ba045 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -167,15 +167,20 @@ fn read_type_meta_body_size(reader: &mut Reader, header: i64) -> Result u64 { - let hash_value = murmurhash3_x64_128(body, 47).0 as i64; +fn type_meta_hash_bits(body: &[u8], header_low_bits: u64) -> u64 { + let mut hash_input = Vec::with_capacity(body.len() + 2); + hash_input.extend_from_slice(body); + hash_input.push(header_low_bits as u8); + hash_input.push((header_low_bits >> 8) as u8); + let hash_value = murmurhash3_x64_128(&hash_input, 47).0 as i64; hash_value.wrapping_shl(TYPE_META_HASH_SHIFT).wrapping_abs() as u64 } #[inline(always)] fn validate_type_meta_body_hash(header: i64, body: &[u8]) -> Result<(), Error> { let hash_mask = u64::MAX << TYPE_META_HASH_SHIFT; - if ((header as u64) & hash_mask) != (type_meta_hash_bits(body) & hash_mask) { + let expected_hash = type_meta_hash_bits(body, (header as u64) & !hash_mask); + if ((header as u64) & hash_mask) != (expected_hash & hash_mask) { return Err(Error::invalid_data("TypeMeta metadata hash mismatch")); } Ok(()) @@ -1136,7 +1141,8 @@ impl TypeMeta { if is_compressed { header |= COMPRESS_META_FLAG; } - let meta_hash_shifted = type_meta_hash_bits(meta_writer.dump().as_slice()) as i64; + let meta_hash_shifted = + type_meta_hash_bits(meta_writer.dump().as_slice(), header as u64) as i64; let meta_hash = meta_hash_shifted >> TYPE_META_HASH_SHIFT; header |= meta_hash_shifted; result.write_i64(header); @@ -1176,6 +1182,34 @@ mod tests { assert!(message.contains("hash mismatch")); } + #[test] + fn rejects_body_only_header_hash() { + let meta = TypeMeta::new( + STRUCT, + 1, + MetaString::get_empty().clone(), + MetaString::get_empty().clone(), + false, + vec![], + ) + .unwrap(); + let (mut bytes, _) = meta.to_bytes().unwrap(); + let header = i64::from_le_bytes(bytes[0..8].try_into().unwrap()) as u64; + let hash_mask = u64::MAX << TYPE_META_HASH_SHIFT; + let body_only_hash = body_only_type_meta_hash_bits(&bytes[8..]); + assert_ne!(header & hash_mask, body_only_hash); + let rewritten_header = body_only_hash | (header & !hash_mask); + bytes[0..8].copy_from_slice(&(rewritten_header as i64).to_le_bytes()); + + let mut reader = Reader::new(&bytes); + let result = TypeMeta::from_bytes(&mut reader, &TypeResolver::default()); + let message = result + .err() + .map(|error| error.to_string()) + .unwrap_or_default(); + assert!(message.contains("hash mismatch")); + } + #[test] fn rejects_hash_consistent_trailing_body_bytes() { let meta = TypeMeta::new( @@ -1193,8 +1227,9 @@ mod tests { let mut frame = vec![]; let mut writer = Writer::from_buffer(&mut frame); let body_size = body.len() as i64; - let mut header = type_meta_hash_bits(&body) as i64; - header |= min(META_SIZE_MASK, body_size); + let header_low_bits = min(META_SIZE_MASK, body_size); + let mut header = type_meta_hash_bits(&body, header_low_bits as u64) as i64; + header |= header_low_bits; writer.write_i64(header); if body_size >= META_SIZE_MASK { writer.write_var_u32((body_size - META_SIZE_MASK) as u32); @@ -1209,4 +1244,10 @@ mod tests { .unwrap_or_default(); assert!(message.contains("metadata size")); } + + fn body_only_type_meta_hash_bits(body: &[u8]) -> u64 { + let hash_value = murmurhash3_x64_128(body, 47).0 as i64; + let shifted = hash_value << TYPE_META_HASH_SHIFT; + shifted.wrapping_abs() as u64 & (u64::MAX << TYPE_META_HASH_SHIFT) + } } diff --git a/rust/fory-core/src/resolver/meta_resolver.rs b/rust/fory-core/src/resolver/meta_resolver.rs index 260a651615..103c2fdb99 100644 --- a/rust/fory-core/src/resolver/meta_resolver.rs +++ b/rust/fory-core/src/resolver/meta_resolver.rs @@ -149,14 +149,14 @@ impl MetaReaderResolver { .filter(|_| self.last_meta_header == meta_header) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache - // only after a successful TypeMeta parse and 52-bit body-hash validation. + // only after a successful TypeMeta parse and 52-bit metadata-hash validation. self.reading_type_infos.push(type_info.clone()); TypeMeta::skip_bytes_for_validated_header(reader, meta_header)?; return Ok(type_info.clone()); } if let Some(type_info) = self.parsed_type_infos.get(&meta_header) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache - // only after a successful TypeMeta parse and 52-bit body-hash validation. + // only after a successful TypeMeta parse and 52-bit metadata-hash validation. self.last_meta_header = meta_header; self.last_type_info = Some(type_info.clone()); self.reading_type_infos.push(type_info.clone()); diff --git a/swift/Sources/Fory/ReadContext.swift b/swift/Sources/Fory/ReadContext.swift index 922c170198..6d1967d59f 100644 --- a/swift/Sources/Fory/ReadContext.swift +++ b/swift/Sources/Fory/ReadContext.swift @@ -269,7 +269,7 @@ public final class ReadContext { } if header == localTypeDefHeader { // Header-cache hits intentionally skip without rehashing. Entries reach this - // cache only after a successful TypeDef parse and 52-bit body-hash validation. + // cache only after a successful TypeDef parse and 52-bit metadata-hash validation. compatibleTypeDefTypeInfos.push(localTypeInfo) try buffer.skip(bodySize) return nil @@ -304,7 +304,7 @@ public final class ReadContext { } if let cached = typeResolver.getTypeInfo(forHeader: header) { // Header-cache hits intentionally skip without rehashing. Entries reach this cache only - // after a successful TypeDef parse and 52-bit body-hash validation. + // after a successful TypeDef parse and 52-bit metadata-hash validation. try buffer.skip(bodySize) compatibleTypeDefTypeInfos.push(cached) return cached @@ -341,7 +341,7 @@ public final class ReadContext { if header == localTypeDefHeader { // Header-cache hits intentionally skip without rehashing. Entries reach this - // cache only after a successful TypeDef parse and 52-bit body-hash validation. + // cache only after a successful TypeDef parse and 52-bit metadata-hash validation. compatibleTypeDefTypeInfos.push(localTypeInfo) try buffer.skip(bodySize) return localTypeInfo diff --git a/swift/Sources/Fory/TypeMeta.swift b/swift/Sources/Fory/TypeMeta.swift index 4c123b2bb2..ebb6d78503 100644 --- a/swift/Sources/Fory/TypeMeta.swift +++ b/swift/Sources/Fory/TypeMeta.swift @@ -310,11 +310,11 @@ public final class TypeMeta: Equatable, @unchecked Sendable { } let body = try encodeBody() - var header = Self.typeMetaHeaderHash(body) + var headerLowBits = UInt64(min(body.count, Int(typeMetaSizeMask))) if compressed { - header |= typeMetaCompressedFlag + headerLowBits |= typeMetaCompressedFlag } - header |= UInt64(min(body.count, Int(typeMetaSizeMask))) + let header = Self.typeMetaHeaderHash(body, headerLowBits: headerLowBits) | headerLowBits let buffer = ByteBuffer(capacity: body.count + 16) buffer.writeUInt64(header) @@ -407,7 +407,8 @@ public final class TypeMeta: Equatable, @unchecked Sendable { if bodyReader.remaining != 0 { throw ForyError.invalidData("unexpected trailing bytes in TypeMeta body") } - if (header & Self.hashMask()) != Self.typeMetaHeaderHash(encodedBody) { + if (header & Self.hashMask()) + != Self.typeMetaHeaderHash(encodedBody, headerLowBits: header & ~Self.hashMask()) { throw ForyError.invalidData("invalid TypeMeta metadata hash") } @@ -470,8 +471,11 @@ public final class TypeMeta: Equatable, @unchecked Sendable { UInt64.max << (64 - typeMetaNumHashBits) } - private static func typeMetaHeaderHash(_ body: [UInt8]) -> UInt64 { - let bodyHash = MurmurHash3.x64_128(body, seed: typeMetaHashSeed).0 + private static func typeMetaHeaderHash(_ body: [UInt8], headerLowBits: UInt64) -> UInt64 { + var hashInput = body + hashInput.append(UInt8(truncatingIfNeeded: headerLowBits)) + hashInput.append(UInt8(truncatingIfNeeded: headerLowBits >> 8)) + let bodyHash = MurmurHash3.x64_128(hashInput, seed: typeMetaHashSeed).0 let shifted = bodyHash << (64 - typeMetaNumHashBits) let signed = Int64(bitPattern: shifted) let absSigned = signed == Int64.min ? signed : Swift.abs(signed) diff --git a/swift/Tests/ForyTests/ForySwiftTests.swift b/swift/Tests/ForyTests/ForySwiftTests.swift index dbdf4ce4c3..a6ff197d38 100644 --- a/swift/Tests/ForyTests/ForySwiftTests.swift +++ b/swift/Tests/ForyTests/ForySwiftTests.swift @@ -1280,3 +1280,39 @@ func typeMetaRoundTripByID() throws { #expect(decoded.userTypeID == 101) #expect(decoded.fields.isEmpty) } + +@Test +func typeMetaHeaderHashIncludesHeaderLowBits() throws { + let emptyNamespace = MetaString.empty(specialChar1: ".", specialChar2: "_") + let emptyTypeName = MetaString.empty(specialChar1: "$", specialChar2: "_") + + let meta = try TypeMeta( + typeID: TypeId.structType.rawValue, + userTypeID: 102, + namespace: emptyNamespace, + typeName: emptyTypeName, + registerByName: false, + fields: [] + ) + + var encoded = try meta.encode() + let header = try ByteBuffer(bytes: encoded).readUInt64() + let hashMask = UInt64.max << 12 + let bodyOnlyHash = bodyOnlyTypeMetaHeaderHash(Array(encoded.dropFirst(8))) + #expect((header & hashMask) != bodyOnlyHash) + let rewrittenHeader = bodyOnlyHash | (header & ~hashMask) + for index in 0..<8 { + encoded[index] = UInt8(truncatingIfNeeded: rewrittenHeader >> (index * 8)) + } + + #expect(throws: ForyError.self) { + _ = try TypeMeta.decode(encoded) + } +} + +private func bodyOnlyTypeMetaHeaderHash(_ body: [UInt8]) -> UInt64 { + let shifted = MurmurHash3.x64_128(body, seed: 47).0 << 12 + let signed = Int64(bitPattern: shifted) + let absSigned = signed == Int64.min ? signed : Swift.abs(signed) + return UInt64(bitPattern: absSigned) & (UInt64.max << 12) +}