Skip to content

Commit

Permalink
Performance improvements for BytesRefHash (opensearch-project#8788)
Browse files Browse the repository at this point in the history
* Performance improvements for BytesRefHash

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Replace BytesRefHash and clean up alternative implementations

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Added t1ha1 to replace xxh3 hash function

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Update t1ha1 to use unsignedMultiplyHigh on JDK 18 and above

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Add link to the reference implementation for t1ha1

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Annotate t1ha1 with @opensearch.internal

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Run spotless

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Add pre-computed hashes to speed up reinserts

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

* Refactor HashFunctionTestCase

Signed-off-by: Ketan Verma <ketan9495@gmail.com>

---------

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
Signed-off-by: Ivan Brusic <ivan.brusic@flocksafety.com>
  • Loading branch information
ketanv3 authored and brusic committed Sep 25, 2023
1 parent 2a7d1c3 commit 168d3a0
Show file tree
Hide file tree
Showing 17 changed files with 1,547 additions and 144 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Replace the deprecated IndexReader APIs with new storedFields() & termVectors() ([#7792](https://github.com/opensearch-project/OpenSearch/pull/7792))
- [Remote Store] Add support to restore only unassigned shards of an index ([#8792](https://github.com/opensearch-project/OpenSearch/pull/8792))
- Add safeguard limits for file cache during node level allocation ([#8208](https://github.com/opensearch-project/OpenSearch/pull/8208))
- Performance improvements for BytesRefHash ([#8788](https://github.com/opensearch-project/OpenSearch/pull/8788))
- Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
- [Remove] Deprecated Fractional ByteSizeValue support #9005 ([#9005](https://github.com/opensearch-project/OpenSearch/pull/9005))
- Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.hash;

import org.apache.lucene.util.StringHelper;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.Random;

@Fork(value = 3)
@Warmup(iterations = 1, time = 1)
@Measurement(iterations = 3, time = 3)
@BenchmarkMode(Mode.Throughput)
public class HashFunctionBenchmark {

@Benchmark
public void hash(Blackhole bh, Options opts) {
bh.consume(opts.type.hash(opts.data));
}

@State(Scope.Benchmark)
public static class Options {
@Param({ "MURMUR3", "T1HA1" })
public Type type;

@Param({
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"12",
"14",
"16",
"18",
"21",
"24",
"28",
"32",
"36",
"41",
"47",
"54",
"62",
"71",
"81",
"90",
"100",
"112",
"125",
"139",
"156",
"174",
"194",
"220",
"245",
"272",
"302",
"339",
"384",
"431",
"488",
"547",
"608",
"675",
"763",
"863",
"967",
"1084",
"1225",
"1372",
"1537",
"1737",
"1929",
"2142",
"2378",
"2664",
"3011",
"3343",
"3778",
"4232",
"4783",
"5310",
"5895",
"6662",
"7529",
"8508",
"9444",
"10483",
"11741",
"13150",
"14597",
"16495",
"18475",
"20877",
"23383",
"25956",
"29071",
"32560",
"36142",
"40841",
"46151",
"52151",
"57888",
"65414",
"72610",
"82050",
"91076",
"102006",
"114247",
"127957",
"143312",
"159077",
"176576",
"199531",
"223475",
"250292",
"277825",
"313943",
"351617",
"393812" })
public Integer length;
public byte[] data;

@Setup
public void setup() {
data = new byte[length];
new Random(0).nextBytes(data);
}
}

public enum Type {
MURMUR3((data, offset, length) -> StringHelper.murmurhash3_x86_32(data, offset, length, 0)),
T1HA1((data, offset, length) -> T1ha1.hash(data, offset, length, 0));

private final Hasher hasher;

Type(Hasher hasher) {
this.hasher = hasher;
}

public long hash(byte[] data) {
return hasher.hash(data, 0, data.length);
}
}

@FunctionalInterface
interface Hasher {
long hash(byte[] data, int offset, int length);
}
}

0 comments on commit 168d3a0

Please sign in to comment.