From e673cb4fd00ebdaa24c9ec094c980ea077283b70 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 27 Jan 2026 12:05:03 +0800 Subject: [PATCH] implement simple murmur hash in go and remvoe deps --- go/fory/go.mod | 1 - go/fory/go.sum | 2 - go/fory/meta_string_resolver.go | 5 +- go/fory/murmur.go | 144 +++++++++++++++++++++++++ go/fory/struct.go | 4 +- go/fory/tests/xlang/xlang_test_main.go | 14 +-- go/fory/type_def.go | 3 +- 7 files changed, 151 insertions(+), 22 deletions(-) create mode 100644 go/fory/murmur.go diff --git a/go/fory/go.mod b/go/fory/go.mod index 8d261fe0f0..a3d03bc1ee 100644 --- a/go/fory/go.mod +++ b/go/fory/go.mod @@ -20,7 +20,6 @@ module github.com/apache/fory/go/fory go 1.24.0 require ( - github.com/spaolacci/murmur3 v1.1.0 github.com/stretchr/testify v1.7.0 golang.org/x/tools v0.39.0 ) diff --git a/go/fory/go.sum b/go/fory/go.sum index 88e696974e..c967dc8cb9 100644 --- a/go/fory/go.sum +++ b/go/fory/go.sum @@ -5,8 +5,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= -github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= diff --git a/go/fory/meta_string_resolver.go b/go/fory/meta_string_resolver.go index f1ba921474..f8398b23e0 100644 --- a/go/fory/meta_string_resolver.go +++ b/go/fory/meta_string_resolver.go @@ -22,7 +22,6 @@ import ( "encoding/binary" "fmt" "github.com/apache/fory/go/fory/meta" - "github.com/spaolacci/murmur3" ) // Constants for string handling @@ -221,7 +220,7 @@ func (r *MetaStringResolver) GetMetaStrBytes(metastr *meta.MetaString) *MetaStri hashcode = ((v1*31 + v2) >> 8 << 8) | int64(metastr.GetEncoding()) } else { // Large string: use MurmurHash3 - h64 := murmur3.Sum64WithSeed(data, 47) + h64 := Murmur3Sum64WithSeed(data, 47) hashcode = int64((h64 >> 8) << 8) hashcode |= int64(metastr.GetEncoding()) } @@ -249,7 +248,7 @@ func ComputeMetaStringHash(data []byte, encoding meta.Encoding) int64 { hashcode = ((v1*31 + v2) >> 8 << 8) | int64(encoding) } else { // Large string: use MurmurHash3 - h64 := murmur3.Sum64WithSeed(data, 47) + h64 := Murmur3Sum64WithSeed(data, 47) hashcode = int64((h64 >> 8) << 8) hashcode |= int64(encoding) } diff --git a/go/fory/murmur.go b/go/fory/murmur.go new file mode 100644 index 0000000000..cfa68b56d2 --- /dev/null +++ b/go/fory/murmur.go @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fory + +import "encoding/binary" + +const ( + murmurC1_128 = 0x87c37b91114253d5 + murmurC2_128 = 0x4cf5ad432745937f +) + +func Murmur3Sum64WithSeed(data []byte, seed uint32) uint64 { + h1, _ := Murmur3Sum128WithSeed(data, seed) + return h1 +} + +func MurmurHash3_x64_128(data []byte, seed int64) (uint64, uint64) { + return Murmur3Sum128WithSeed(data, uint32(seed)) +} + +func Murmur3Sum128WithSeed(data []byte, seed uint32) (uint64, uint64) { + h1 := uint64(seed) + h2 := uint64(seed) + + nblocks := len(data) / 16 + for i := 0; i < nblocks; i++ { + block := data[i*16:] + k1 := binary.LittleEndian.Uint64(block) + k2 := binary.LittleEndian.Uint64(block[8:]) + + k1 *= murmurC1_128 + k1 = (k1 << 31) | (k1 >> 33) + k1 *= murmurC2_128 + h1 ^= k1 + + h1 = (h1 << 27) | (h1 >> 37) + h1 += h2 + h1 = h1*5 + 0x52dce729 + + k2 *= murmurC2_128 + k2 = (k2 << 33) | (k2 >> 31) + k2 *= murmurC1_128 + h2 ^= k2 + + h2 = (h2 << 31) | (h2 >> 33) + h2 += h1 + h2 = h2*5 + 0x38495ab5 + } + + tail := data[nblocks*16:] + var k1, k2 uint64 + switch len(tail) & 15 { + case 15: + k2 ^= uint64(tail[14]) << 48 + fallthrough + case 14: + k2 ^= uint64(tail[13]) << 40 + fallthrough + case 13: + k2 ^= uint64(tail[12]) << 32 + fallthrough + case 12: + k2 ^= uint64(tail[11]) << 24 + fallthrough + case 11: + k2 ^= uint64(tail[10]) << 16 + fallthrough + case 10: + k2 ^= uint64(tail[9]) << 8 + fallthrough + case 9: + k2 ^= uint64(tail[8]) + k2 *= murmurC2_128 + k2 = (k2 << 33) | (k2 >> 31) + k2 *= murmurC1_128 + h2 ^= k2 + fallthrough + case 8: + k1 ^= uint64(tail[7]) << 56 + fallthrough + case 7: + k1 ^= uint64(tail[6]) << 48 + fallthrough + case 6: + k1 ^= uint64(tail[5]) << 40 + fallthrough + case 5: + k1 ^= uint64(tail[4]) << 32 + fallthrough + case 4: + k1 ^= uint64(tail[3]) << 24 + fallthrough + case 3: + k1 ^= uint64(tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint64(tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint64(tail[0]) + k1 *= murmurC1_128 + k1 = (k1 << 31) | (k1 >> 33) + k1 *= murmurC2_128 + h1 ^= k1 + } + + h1 ^= uint64(len(data)) + h2 ^= uint64(len(data)) + + h1 += h2 + h2 += h1 + + h1 = murmurFmix64(h1) + h2 = murmurFmix64(h2) + + h1 += h2 + h2 += h1 + + return h1, h2 +} + +func murmurFmix64(k uint64) uint64 { + k ^= k >> 33 + k *= 0xff51afd7ed558ccd + k ^= k >> 33 + k *= 0xc4ceb9fe1a85ec53 + k ^= k >> 33 + return k +} diff --git a/go/fory/struct.go b/go/fory/struct.go index bb034fad89..d407e46249 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -27,8 +27,6 @@ import ( "unicode" "unicode/utf8" "unsafe" - - "github.com/spaolacci/murmur3" ) // GetStructHash returns the struct hash for a given type using the provided TypeResolver. @@ -970,7 +968,7 @@ func (s *structSerializer) computeHash() int32 { hashString := ComputeStructFingerprint(fields) data := []byte(hashString) - h1, _ := murmur3.Sum128WithSeed(data, 47) + h1, _ := Murmur3Sum128WithSeed(data, 47) hash := int32(h1 & 0xFFFFFFFF) if DebugOutputEnabled() { diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index 47907411c8..fa73f5bac6 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -25,7 +25,6 @@ import ( "runtime" "github.com/apache/fory/go/fory" - "github.com/spaolacci/murmur3" ) // ============================================================================ @@ -183,13 +182,6 @@ func assertEqualFloat64(expected, actual float64, name string) { } } -func murmurHash3_x64_128(data []byte, seed int64) (uint64, uint64) { - h := murmur3.New128WithSeed(uint32(seed)) - h.Write(data) - h1, h2 := h.Sum128() - return h1, h2 -} - // ============================================================================ // Test Data Structures // ============================================================================ @@ -559,8 +551,8 @@ func testMurmurHash3() { _ = buf.ReadInt64(&bufErr) _ = buf.ReadInt64(&bufErr) - h1_1, h1_2 := murmurHash3_x64_128([]byte{1, 2, 8}, 47) - h2_1, h2_2 := murmurHash3_x64_128([]byte("01234567890123456789"), 47) + h1_1, h1_2 := fory.MurmurHash3_x64_128([]byte{1, 2, 8}, 47) + h2_1, h2_2 := fory.MurmurHash3_x64_128([]byte("01234567890123456789"), 47) outBuf := fory.NewByteBuffer(make([]byte, 0, 32)) outBuf.WriteInt64(int64(h1_1)) @@ -575,7 +567,7 @@ func testMurmurHash3() { h2 := buf.ReadInt64(&bufErr) // Compute expected values - expected1, expected2 := murmurHash3_x64_128([]byte{1, 2, 8}, 47) + expected1, expected2 := fory.MurmurHash3_x64_128([]byte{1, 2, 8}, 47) if h1 != int64(expected1) || h2 != int64(expected2) { panic(fmt.Sprintf("MurmurHash3 mismatch: got (%d, %d), expected (%d, %d)", diff --git a/go/fory/type_def.go b/go/fory/type_def.go index 363f46091d..292f4021a4 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -24,7 +24,6 @@ import ( "reflect" "github.com/apache/fory/go/fory/meta" - "github.com/spaolacci/murmur3" ) const ( @@ -1233,7 +1232,7 @@ func prependGlobalHeader(buffer *ByteBuffer, isCompressed bool, hasFieldsMeta bo var header uint64 metaSize := buffer.WriterIndex() - hashValue := murmur3.Sum64WithSeed(buffer.GetByteSlice(0, metaSize), 47) + hashValue := Murmur3Sum64WithSeed(buffer.GetByteSlice(0, metaSize), 47) header |= hashValue << (64 - NUM_HASH_BITS) if hasFieldsMeta {