From 39fa2bb03a4a77004131618b9fa4aec900855b3d Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Fri, 22 May 2020 11:40:46 +0300 Subject: [PATCH 1/3] Fix old hashing for 8 byte types, and key onnx conversion. --- src/Microsoft.ML.Data/Transforms/Hashing.cs | 41 +++++++++++-------- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 40 +++++++++--------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 59c4db4881..0b8f914160 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -563,13 +563,12 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) private readonly struct HashDouble : IHasher { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public uint HashCoreOld(uint seed, uint mask, in double value) { if (double.IsNaN(value)) return 0; - return (Hashing.MixHash(HashRound(seed, value)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, true)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -578,7 +577,7 @@ public uint HashCore(uint seed, uint mask, in double value) if (double.IsNaN(value)) return 0; - return (Hashing.MixHash(HashRound(seed, value), sizeof(double)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), sizeof(double)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -589,17 +588,19 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) { if (double.IsNaN(value)) return 0; - hash = HashRound(hash, value); + hash = HashRound(hash, value, false); } return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint HashRound(uint seed, double value) + private uint HashRound(uint seed, double value, bool old) { ulong v = FloatUtils.GetBits(value == 0 ? 0 : value); var hash = Hashing.MurmurRound(seed, Utils.GetLo(v)); var hi = Utils.GetHi(v); + if (old && hi == 0) + return hash; return Hashing.MurmurRound(hash, hi); } } @@ -707,7 +708,7 @@ public uint HashCoreOld(uint seed, uint mask, in ulong value) { if (value == 0) return 0; - return (Hashing.MixHash(HashRound(seed, value)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, true)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -715,7 +716,7 @@ public uint HashCore(uint seed, uint mask, in ulong value) { if (value == 0) return 0; - return (Hashing.MixHash(HashRound(seed, value), sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -726,17 +727,17 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) { if (value == 0) return 0; - hash = HashRound(hash, value); + hash = HashRound(hash, value, false); } return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint HashRound(uint seed, ulong value) + private uint HashRound(uint seed, ulong value, bool old) { var hash = Hashing.MurmurRound(seed, Utils.GetLo(value)); var hi = Utils.GetHi(value); - if (hi == 0) + if (old && hi == 0) return hash; return Hashing.MurmurRound(hash, hi); } @@ -807,13 +808,13 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCoreOld(uint seed, uint mask, in ulong value) { - return (Hashing.MixHash(HashRound(seed, value)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, true)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCore(uint seed, uint mask, in ulong value) { - return (Hashing.MixHash(HashRound(seed, value), sizeof(ulong)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -821,15 +822,17 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) { var hash = seed; foreach (var value in values.DenseValues()) - hash = HashRound(hash, value); + hash = HashRound(hash, value, false); return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint HashRound(uint seed, ulong value) + private uint HashRound(uint seed, ulong value, bool old) { var hash = Hashing.MurmurRound(seed, Utils.GetLo(value)); var hi = Utils.GetHi(value); + if (old && hi == 0) + return hash; return Hashing.MurmurRound(hash, hi); } } @@ -960,13 +963,13 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCoreOld(uint seed, uint mask, in long value) { - return (Hashing.MixHash(HashRound(seed, value)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, true)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCore(uint seed, uint mask, in long value) { - return (Hashing.MixHash(HashRound(seed, value), sizeof(long)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), sizeof(long)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -974,15 +977,17 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) { var hash = seed; foreach (var value in values.DenseValues()) - hash = HashRound(hash, value); + hash = HashRound(hash, value, false); return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint HashRound(uint seed, long value) + private uint HashRound(uint seed, long value, bool old) { var hash = Hashing.MurmurRound(seed, Utils.GetLo((ulong)value)); var hi = Utils.GetHi((ulong)value); + if (old && hi == 0) + return hash; return Hashing.MurmurRound(hash, hi); } } diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index cfabd9d1a0..6b5d032dcb 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -6,7 +6,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Runtime.InteropServices; using System.Text.RegularExpressions; using Google.Protobuf; using Microsoft.ML.Data; @@ -1200,29 +1199,32 @@ private class HashData public uint Value { get; set; } } - [Fact] - public void MurmurHashKeyTest() + [Theory] + [CombinatorialData] + public void MurmurHashKeyTest( + [CombinatorialValues(/*DataKind.Byte, DataKind.UInt16, */DataKind.UInt32/*, DataKind.UInt64*/)]DataKind keyType) { - var mlContext = new MLContext(); + var dataFile = DeleteOutputPath("KeysToOnnx.txt"); + File.WriteAllLines(dataFile, + new[] + { + "2", + "5", + "19" + }); - var samples = new[] + var data = ML.Data.LoadFromTextFile(dataFile, new[] { - new HashData {Value = 232}, - new HashData {Value = 42}, - new HashData {Value = 0}, - }; - - IDataView data = mlContext.Data.LoadFromEnumerable(samples); + new TextLoader.Column("Value", keyType, new[] + { + new TextLoader.Range(0) + }, new KeyCount(10)) + }); - var hashEstimator = mlContext.Transforms.Conversion.MapValueToKey("Value").Append(mlContext.Transforms.Conversion.Hash(new[] - { - new HashingEstimator.ColumnOptions( - "ValueHashed", - "Value") - })); + var hashEstimator = ML.Transforms.Conversion.Hash("ValueHashed", "Value"); var model = hashEstimator.Fit(data); var transformedData = model.Transform(data); - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, data); + var onnxModel = ML.Model.ConvertToOnnxProtobuf(model, data); var onnxFileName = "MurmurHashV2.onnx"; var onnxTextName = "MurmurHashV2.txt"; @@ -1236,7 +1238,7 @@ public void MurmurHashKeyTest() // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); + var onnxEstimator = ML.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); var onnxTransformer = onnxEstimator.Fit(data); var onnxResult = onnxTransformer.Transform(data); CompareSelectedColumns("ValueHashed", "ValueHashed", transformedData, onnxResult); From 652fbe9c9b761f1e909afb01a039d4d0ead732aa Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Sun, 24 May 2020 10:48:30 +0300 Subject: [PATCH 2/3] Fix more hashing and unit tests --- src/Microsoft.ML.Data/Transforms/Hashing.cs | 22 +++++++------- .../Common/SavePipe/SavePipeHash-Data.txt | 8 ++--- .../Transformers/HashTests.cs | 30 ++++++++++++------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 0b8f914160..5d2b80ed48 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -590,7 +590,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = HashRound(hash, value, false); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, sizeof(double)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -716,7 +716,7 @@ public uint HashCore(uint seed, uint mask, in ulong value) { if (value == 0) return 0; - return (Hashing.MixHash(HashRound(seed, value, false), sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -729,7 +729,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = HashRound(hash, value, false); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -823,7 +823,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = HashRound(hash, value, false); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -842,13 +842,13 @@ private uint HashRound(uint seed, ulong value, bool old) [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCoreOld(uint seed, uint mask, in DataViewRowId value) { - return (Hashing.MixHash(HashRound(seed, value)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, true)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint HashCore(uint seed, uint mask, in DataViewRowId value) { - return (Hashing.MixHash(HashRound(seed, value), sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(HashRound(seed, value, false), 2 * sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -856,18 +856,18 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) { var hash = seed; foreach (var value in values.DenseValues()) - hash = HashRound(hash, value); + hash = HashRound(hash, value, false); return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint HashRound(uint seed, DataViewRowId value) + private uint HashRound(uint seed, DataViewRowId value, bool old) { var hash = Hashing.MurmurRound(seed, Utils.GetLo(value.Low)); var hi = Utils.GetHi(value.Low); - if (hi != 0) + if (old && hi != 0) hash = Hashing.MurmurRound(hash, hi); - if (value.High != 0) + if (old && value.High != 0) { hash = Hashing.MurmurRound(hash, Utils.GetLo(value.High)); hi = Utils.GetHi(value.High); @@ -978,7 +978,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = HashRound(hash, value, false); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, sizeof(long)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeHash-Data.txt b/test/BaselineOutput/Common/SavePipe/SavePipeHash-Data.txt index 6ffa64563f..714394bf7b 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeHash-Data.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeHash-Data.txt @@ -18,7 +18,7 @@ #@ col=VarComb:U4[128]:32-** #@ } SingleHash 31 27:Hash9 28:Hash10 29:Hash11 30:Hash12 -14 14 14 14 6 0 13 24 47 44 32 16 40 22 24 32 16 40 22 56 59 22 56 59 53 22 56 22 120 0 3 112 33 31 117 22 120 51 31 39 51 31 39 51 31 39 51 31 39 -0 0 1 4 0 13 0 32 16 20 52 31 44 24 24 52 31 44 56 59 56 56 59 56 22 56 22 24 123 3 0 112 50 41 36 117 123 75 127 51 47 109 108 51 47 109 108 51 47 109 108 51 47 109 108 -14 14 11 4 6 6 0 24 60 20 32 47 44 22 22 24 32 47 44 22 22 56 22 22 56 53 53 22 22 150 6 0 38 22 68 68 68 68 -74 3:10 6:6 9:10 12:20 15:22 18:20 21:22 24:22 27:53 31:6 36:35 38:47 43:51 45:22 50:66 52:96 57:66 59:96 64:66 66:96 71:66 73:96 +14 14 14 14 6 0 13 24 47 44 8 31 17 22 24 32 16 40 22 56 59 35 23 23 53 22 56 22 120 0 7 112 33 31 117 22 120 51 31 39 51 31 39 51 31 39 17 51 35 +0 0 1 4 0 13 0 32 16 20 49 51 54 24 24 52 31 44 56 59 56 23 23 23 22 56 22 24 123 3 7 112 50 41 36 117 123 75 127 51 47 109 108 51 47 109 108 51 47 109 108 17 91 57 49 +14 14 11 4 6 6 0 24 60 20 8 6 54 22 22 24 32 47 44 22 22 56 35 35 23 53 53 22 22 150 6 7 38 22 68 68 68 5 +74 3:10 6:6 9:10 12:40 15:22 18:20 21:22 24:35 27:53 31:3 36:35 38:47 43:51 45:22 50:66 52:96 57:66 59:96 64:66 66:96 71:2 73:55 diff --git a/test/Microsoft.ML.Tests/Transformers/HashTests.cs b/test/Microsoft.ML.Tests/Transformers/HashTests.cs index 16e885f4bd..0109d54161 100644 --- a/test/Microsoft.ML.Tests/Transformers/HashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/HashTests.cs @@ -220,7 +220,6 @@ ValueGetter hashGetter(HashingEstimator.ColumnOptions colInfo) } private void HashTestPositiveIntegerCore32Bits(ulong value, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse) - { uint eKey = value == 0 ? 0 : expected; uint eoKey = value == 0 ? 0 : expectedOrdered; @@ -243,9 +242,6 @@ private void HashTestPositiveIntegerCore32Bits(ulong value, uint expected, uint HashTestCore((uint)value, new KeyDataViewType(typeof(uint), int.MaxValue - 1), eKey, eoKey, e3Key, ecKey, 0); } - HashTestCore(new DataViewRowId(value, 0), RowIdDataViewType.Instance, expected, expectedOrdered, expectedOrdered3, expectedCombined, expectedCombinedSparse); - HashTestCore((ulong)value, new KeyDataViewType(typeof(ulong), int.MaxValue - 1), eKey, eoKey, e3Key, ecKey, 0); - // Next let's check signed numbers. if (value <= (ulong)sbyte.MaxValue) HashTestCore((sbyte)value, NumberDataViewType.SByte, expected, expectedOrdered, expectedOrdered3, expectedCombined, expectedCombinedSparse); @@ -256,7 +252,6 @@ private void HashTestPositiveIntegerCore32Bits(ulong value, uint expected, uint } private void HashTestPositiveIntegerCore64Bits(ulong value, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse) - { uint eKey = value == 0 ? 0 : expected; uint eoKey = value == 0 ? 0 : expectedOrdered; @@ -268,6 +263,14 @@ private void HashTestPositiveIntegerCore64Bits(ulong value, uint expected, uint // Next let's check signed numbers. if (value <= long.MaxValue) HashTestCore((long)value, NumberDataViewType.Int64, expected, expectedOrdered, expectedOrdered3, expectedCombined, expectedCombinedSparse); + + // ulong keys + HashTestCore(value, new KeyDataViewType(typeof(ulong), int.MaxValue - 1), eKey, eoKey, e3Key, ecKey, 0); + } + + private void HashTestPositiveIntegerCore128Bits(ulong value, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse) + { + HashTestCore(new DataViewRowId(value, 0), RowIdDataViewType.Instance, expected, expectedOrdered, expectedOrdered3, expectedCombined, expectedCombinedSparse); } [Fact] @@ -277,9 +280,12 @@ public void TestHashIntegerNumbers() HashTestPositiveIntegerCore32Bits(1, 502, 537, 746, 588, 286); HashTestPositiveIntegerCore32Bits(2, 407, 801, 652, 696, 172); - HashTestPositiveIntegerCore64Bits(0, 512, 851, 795, 1010, 620); - HashTestPositiveIntegerCore64Bits(1, 329, 190, 574, 491, 805); - HashTestPositiveIntegerCore64Bits(2, 484, 713, 128, 606, 326); + HashTestPositiveIntegerCore64Bits(0, 512, 851, 795, 904, 84); + HashTestPositiveIntegerCore64Bits(1, 329, 190, 574, 894, 602); + HashTestPositiveIntegerCore64Bits(2, 484, 713, 128, 990, 413); + + HashTestPositiveIntegerCore128Bits(0, 362, 161, 115, 882, 1010); + HashTestPositiveIntegerCore128Bits(1294, 712, 920, 291, 945, 331); } [Fact] @@ -295,10 +301,12 @@ public void TestHashFloatingPointNumbers() HashTestCore(1f, NumberDataViewType.Single, 463, 855, 732, 75, 487); HashTestCore(-1f, NumberDataViewType.Single, 252, 612, 780, 179, 80); HashTestCore(0f, NumberDataViewType.Single, 842, 358, 20, 882, 1010); + HashTestCore(float.NaN, NumberDataViewType.Single, 0, 0, 0, 0, 0); - HashTestCore(1d, NumberDataViewType.Double, 188, 57, 690, 727, 36); - HashTestCore(-1d, NumberDataViewType.Double, 885, 804, 22, 582, 346); - HashTestCore(0d, NumberDataViewType.Double, 512, 851, 795, 1010, 620); + HashTestCore(1d, NumberDataViewType.Double, 188, 57, 690, 151, 779); + HashTestCore(-1d, NumberDataViewType.Double, 885, 804, 22, 716, 428); + HashTestCore(0d, NumberDataViewType.Double, 512, 851, 795, 904, 84); + HashTestCore(double.NaN, NumberDataViewType.Double, 0, 0, 0, 0, 0); } [Fact] From 07a176032c5b79f9ae1a8cf2b0d2e8416862a860 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Sun, 24 May 2020 11:06:42 +0300 Subject: [PATCH 3/3] hash length of vector when hashing into a single value --- src/Microsoft.ML.Data/Transforms/Hashing.cs | 32 +++++++++---------- .../Transformers/HashTests.cs | 32 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 5d2b80ed48..6bd61f5d82 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -556,7 +556,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = Hashing.MurmurRound(hash, FloatUtils.GetBits(value == 0 ? 0 : value)); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -590,7 +590,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = HashRound(hash, value, false); } - return (Hashing.MixHash(hash, sizeof(double)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(double)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -649,7 +649,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = Hashing.MurmurRound(hash, value); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -673,7 +673,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = Hashing.MurmurRound(hash, value); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -697,7 +697,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = Hashing.MurmurRound(hash, value); } - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -729,7 +729,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) return 0; hash = HashRound(hash, value, false); } - return (Hashing.MixHash(hash, sizeof(ulong)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -759,7 +759,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -779,7 +779,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -799,7 +799,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -823,7 +823,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = HashRound(hash, value, false); - return (Hashing.MixHash(hash, sizeof(ulong)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(ulong)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -857,7 +857,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = HashRound(hash, value, false); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -894,7 +894,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, value ? 1u : 0u); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -914,7 +914,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, (uint)value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -934,7 +934,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, (uint)value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -954,7 +954,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = Hashing.MurmurRound(hash, (uint)value); - return (Hashing.MixHash(hash, sizeof(uint)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(uint)) & mask) + 1; } } @@ -978,7 +978,7 @@ public uint HashCore(uint seed, uint mask, in VBuffer values) var hash = seed; foreach (var value in values.DenseValues()) hash = HashRound(hash, value, false); - return (Hashing.MixHash(hash, sizeof(long)) & mask) + 1; + return (Hashing.MixHash(hash, values.Length * sizeof(long)) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/test/Microsoft.ML.Tests/Transformers/HashTests.cs b/test/Microsoft.ML.Tests/Transformers/HashTests.cs index 0109d54161..63478e0502 100644 --- a/test/Microsoft.ML.Tests/Transformers/HashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/HashTests.cs @@ -276,16 +276,16 @@ private void HashTestPositiveIntegerCore128Bits(ulong value, uint expected, uint [Fact] public void TestHashIntegerNumbers() { - HashTestPositiveIntegerCore32Bits(0, 842, 358, 20, 882, 1010); - HashTestPositiveIntegerCore32Bits(1, 502, 537, 746, 588, 286); - HashTestPositiveIntegerCore32Bits(2, 407, 801, 652, 696, 172); + HashTestPositiveIntegerCore32Bits(0, 842, 358, 20, 429, 333); + HashTestPositiveIntegerCore32Bits(1, 502, 537, 746, 847, 711); + HashTestPositiveIntegerCore32Bits(2, 407, 801, 652, 727, 462); - HashTestPositiveIntegerCore64Bits(0, 512, 851, 795, 904, 84); - HashTestPositiveIntegerCore64Bits(1, 329, 190, 574, 894, 602); - HashTestPositiveIntegerCore64Bits(2, 484, 713, 128, 990, 413); + HashTestPositiveIntegerCore64Bits(0, 512, 851, 795, 333, 113); + HashTestPositiveIntegerCore64Bits(1, 329, 190, 574, 880, 471); + HashTestPositiveIntegerCore64Bits(2, 484, 713, 128, 95, 9); - HashTestPositiveIntegerCore128Bits(0, 362, 161, 115, 882, 1010); - HashTestPositiveIntegerCore128Bits(1294, 712, 920, 291, 945, 331); + HashTestPositiveIntegerCore128Bits(0, 362, 161, 115, 429, 333); + HashTestPositiveIntegerCore128Bits(1294, 712, 920, 291, 859, 353); } [Fact] @@ -298,14 +298,14 @@ public void TestHashString() [Fact] public void TestHashFloatingPointNumbers() { - HashTestCore(1f, NumberDataViewType.Single, 463, 855, 732, 75, 487); - HashTestCore(-1f, NumberDataViewType.Single, 252, 612, 780, 179, 80); - HashTestCore(0f, NumberDataViewType.Single, 842, 358, 20, 882, 1010); + HashTestCore(1f, NumberDataViewType.Single, 463, 855, 732, 56, 557); + HashTestCore(-1f, NumberDataViewType.Single, 252, 612, 780, 116, 515); + HashTestCore(0f, NumberDataViewType.Single, 842, 358, 20, 429, 333); HashTestCore(float.NaN, NumberDataViewType.Single, 0, 0, 0, 0, 0); - HashTestCore(1d, NumberDataViewType.Double, 188, 57, 690, 151, 779); - HashTestCore(-1d, NumberDataViewType.Double, 885, 804, 22, 716, 428); - HashTestCore(0d, NumberDataViewType.Double, 512, 851, 795, 904, 84); + HashTestCore(1d, NumberDataViewType.Double, 188, 57, 690, 655, 896); + HashTestCore(-1d, NumberDataViewType.Double, 885, 804, 22, 461, 309); + HashTestCore(0d, NumberDataViewType.Double, 512, 851, 795, 333, 113); HashTestCore(double.NaN, NumberDataViewType.Double, 0, 0, 0, 0, 0); } @@ -313,8 +313,8 @@ public void TestHashFloatingPointNumbers() public void TestHashBool() { // These are the same for the hashes of 0 and 1. - HashTestCore(false, BooleanDataViewType.Instance, 842, 358, 20, 882, 1010); - HashTestCore(true, BooleanDataViewType.Instance, 502, 537, 746, 588, 286); + HashTestCore(false, BooleanDataViewType.Instance, 842, 358, 20, 429, 333); + HashTestCore(true, BooleanDataViewType.Instance, 502, 537, 746, 847, 711); } private class HashData