Replace Map<Long, Object> by primitive LongObjectHashMap. (#13392)

Add LongObjectHashMap and replace Map<Long, Object>. Add LongIntHashMap and replace Map<Long, Int>. Add HPPC dependency to join and spatial modules for primitive values float and double.
apache · May 23, 2024 · 816db79 · 816db79
1 parent f07038d
commit 816db79
Show file tree

Hide file tree

Showing 26 changed files with 3,280 additions and 265 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -122,6 +122,8 @@ Optimizations
 
 * GITHUB#13368: Replace Map<Integer, Object> by primitive IntObjectHashMap. (Bruno Roustant)
 
+* GITHUB#13392: Replace Map<Long, Object> by primitive LongObjectHashMap. (Bruno Roustant)
+
 Bug Fixes
 ---------------------
 

diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
@@ -20,6 +20,7 @@
 import java.util.List;
 import org.apache.lucene.analysis.cn.smart.Utility;
 import org.apache.lucene.util.hppc.IntObjectHashMap;
+import org.apache.lucene.util.hppc.ObjectCursor;
 
 /**
  * Graph representing possible token pairs (bigrams) at each start offset in the sentence.
@@ -218,8 +219,7 @@ public List<SegToken> getShortPath() {
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
-    for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList :
-        tokenPairListTable.values()) {
+    for (ObjectCursor<ArrayList<SegTokenPair>> segList : tokenPairListTable.values()) {
       for (SegTokenPair pair : segList.value) {
         sb.append(pair).append("\n");
       }

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@@ -22,9 +22,7 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
@@ -54,6 +52,7 @@
 import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.compress.LZ4;
+import org.apache.lucene.util.hppc.LongIntHashMap;
 import org.apache.lucene.util.packed.DirectMonotonicWriter;
 import org.apache.lucene.util.packed.DirectWriter;
 
@@ -273,7 +272,7 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo
     meta.writeLong(numValues);
     final int numBitsPerValue;
     boolean doBlocks = false;
-    Map<Long, Integer> encode = null;
+    LongIntHashMap encode = null;
     if (min >= max) { // meta[-1]: All values are 0
       numBitsPerValue = 0;
       meta.writeInt(-1); // tablesize
@@ -289,7 +288,7 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo
         for (Long v : sortedUniqueValues) {
           meta.writeLong(v); // table[] entry
         }
-        encode = new HashMap<>();
+        encode = new LongIntHashMap();
         for (int i = 0; i < sortedUniqueValues.length; ++i) {
           encode.put(sortedUniqueValues[i], i);
         }
@@ -339,7 +338,7 @@ private void writeValuesSingleBlock(
       int numBitsPerValue,
       long min,
       long gcd,
-      Map<Long, Integer> encode)
+      LongIntHashMap encode)
       throws IOException {
     DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -91,6 +91,8 @@
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.hppc.LongObjectHashMap;
+import org.apache.lucene.util.hppc.ObjectCursor;
 
 /**
  * An <code>IndexWriter</code> creates and maintains an index.
@@ -4387,7 +4389,7 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(
     final ReadersAndUpdates mergedDeletesAndUpdates = getPooledInstance(merge.info, true);
     int numDeletesBefore = mergedDeletesAndUpdates.getDelCount();
     // field -> delGen -> dv field updates
-    Map<String, Map<Long, DocValuesFieldUpdates>> mappedDVUpdates = new HashMap<>();
+    Map<String, LongObjectHashMap<DocValuesFieldUpdates>> mappedDVUpdates = new HashMap<>();
 
     boolean anyDVUpdates = false;
 
@@ -4420,9 +4422,9 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(
 
         String field = ent.getKey();
 
-        Map<Long, DocValuesFieldUpdates> mappedField = mappedDVUpdates.get(field);
+        LongObjectHashMap<DocValuesFieldUpdates> mappedField = mappedDVUpdates.get(field);
         if (mappedField == null) {
-          mappedField = new HashMap<>();
+          mappedField = new LongObjectHashMap<>();
           mappedDVUpdates.put(field, mappedField);
         }
 
@@ -4478,10 +4480,10 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(
 
     if (anyDVUpdates) {
       // Persist the merged DV updates onto the RAU for the merged segment:
-      for (Map<Long, DocValuesFieldUpdates> d : mappedDVUpdates.values()) {
-        for (DocValuesFieldUpdates updates : d.values()) {
-          updates.finish();
-          mergedDeletesAndUpdates.addDVUpdate(updates);
+      for (LongObjectHashMap<DocValuesFieldUpdates> d : mappedDVUpdates.values()) {
+        for (ObjectCursor<DocValuesFieldUpdates> updates : d.values()) {
+          updates.value.finish();
+          mergedDeletesAndUpdates.addDVUpdate(updates.value);
         }
       }
     }

diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
@@ -17,23 +17,23 @@
 package org.apache.lucene.index;
 
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RefCount;
+import org.apache.lucene.util.hppc.LongObjectHashMap;
 
 /**
  * Manages the {@link DocValuesProducer} held by {@link SegmentReader} and keeps track of their
  * reference counting.
  */
 final class SegmentDocValues {
 
-  private final Map<Long, RefCount<DocValuesProducer>> genDVProducers = new HashMap<>();
+  private final LongObjectHashMap<RefCount<DocValuesProducer>> genDVProducers =
+      new LongObjectHashMap<>();
 
   private RefCount<DocValuesProducer> newDocValuesProducer(
       SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException {

diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java b/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hppc;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+/** Constants for primitive maps. */
+public class HashContainers {
+
+  public static final int DEFAULT_EXPECTED_ELEMENTS = 4;
+
+  public static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+  /** Minimal sane load factor (99 empty slots per 100). */
+  public static final float MIN_LOAD_FACTOR = 1 / 100.0f;
+
+  /** Maximum sane load factor (1 empty slot per 100). */
+  public static final float MAX_LOAD_FACTOR = 99 / 100.0f;
+
+  /** Minimum hash buffer size. */
+  public static final int MIN_HASH_ARRAY_LENGTH = 4;
+
+  /**
+   * Maximum array size for hash containers (power-of-two and still allocable in Java, not a
+   * negative int).
+   */
+  public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
+
+  static final AtomicInteger ITERATION_SEED = new AtomicInteger();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java
@@ -18,10 +18,12 @@
 package org.apache.lucene.util.hppc;
 
 import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
+import static org.apache.lucene.util.hppc.HashContainers.*;
 
 import java.util.Arrays;
 import java.util.Iterator;
-import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * A hash map of <code>int</code> to <code>int</code>, implemented using open addressing with linear
@@ -31,28 +33,10 @@
  *
  * <p>github: https://github.com/carrotsearch/hppc release 0.9.0
  */
-public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Cloneable {
+public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Accountable, Cloneable {
 
-  public static final int DEFAULT_EXPECTED_ELEMENTS = 4;
-
-  public static final float DEFAULT_LOAD_FACTOR = 0.75f;
-
-  private static final AtomicInteger ITERATION_SEED = new AtomicInteger();
-
-  /** Minimal sane load factor (99 empty slots per 100). */
-  public static final float MIN_LOAD_FACTOR = 1 / 100.0f;
-
-  /** Maximum sane load factor (1 empty slot per 100). */
-  public static final float MAX_LOAD_FACTOR = 99 / 100.0f;
-
-  /** Minimum hash buffer size. */
-  public static final int MIN_HASH_ARRAY_LENGTH = 4;
-
-  /**
-   * Maximum array size for hash containers (power-of-two and still allocable in Java, not a
-   * negative int).
-   */
-  public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
+  private static final long BASE_RAM_BYTES_USED =
+      RamUsageEstimator.shallowSizeOfInstance(IntIntHashMap.class);
 
   /** The array holding keys. */
   public int[] keys;
@@ -463,6 +447,11 @@ protected int nextIterationSeed() {
     return iterationSeed = BitMixer.mixPhi(iterationSeed);
   }
 
+  @Override
+  public long ramBytesUsed() {
+    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(values);
+  }
+
   /** An iterator implementation for {@link #iterator}. */
   private final class EntryIterator extends AbstractIterator<IntIntCursor> {
     private final IntIntCursor cursor;

diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java
@@ -18,43 +18,27 @@
 package org.apache.lucene.util.hppc;
 
 import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
+import static org.apache.lucene.util.hppc.HashContainers.*;
 
 import java.util.Arrays;
 import java.util.Iterator;
-import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * A hash map of <code>int</code> to <code>Object</code>, implemented using open addressing with
- * linear probing for collision resolution.
+ * linear probing for collision resolution. Supports null values.
  *
  * <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntObjectHashMap
  *
  * <p>github: https://github.com/carrotsearch/hppc release 0.9.0
  */
 @SuppressWarnings("unchecked")
 public class IntObjectHashMap<VType>
-    implements Iterable<IntObjectHashMap.IntObjectCursor<VType>>, Cloneable {
+    implements Iterable<IntObjectHashMap.IntObjectCursor<VType>>, Accountable, Cloneable {
 
-  public static final int DEFAULT_EXPECTED_ELEMENTS = 4;
-
-  public static final float DEFAULT_LOAD_FACTOR = 0.75f;
-
-  private static final AtomicInteger ITERATION_SEED = new AtomicInteger();
-
-  /** Minimal sane load factor (99 empty slots per 100). */
-  public static final float MIN_LOAD_FACTOR = 1 / 100.0f;
-
-  /** Maximum sane load factor (1 empty slot per 100). */
-  public static final float MAX_LOAD_FACTOR = 99 / 100.0f;
-
-  /** Minimum hash buffer size. */
-  public static final int MIN_HASH_ARRAY_LENGTH = 4;
-
-  /**
-   * Maximum array size for hash containers (power-of-two and still allocable in Java, not a
-   * negative int).
-   */
-  public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
+  private static final long BASE_RAM_BYTES_USED =
+      RamUsageEstimator.shallowSizeOfInstance(IntObjectHashMap.class);
 
   /** The array holding keys. */
   public int[] keys;
@@ -304,7 +288,7 @@ public VType indexGet(int index) {
     return (VType) values[index];
   }
 
-  public VType indexReplace(int index, int newValue) {
+  public VType indexReplace(int index, VType newValue) {
     assert index >= 0 : "The index must point at an existing key.";
     assert index <= mask || (index == mask + 1 && hasEmptyKey);
 
@@ -436,6 +420,19 @@ public Iterator<IntObjectCursor<VType>> iterator() {
     return new EntryIterator();
   }
 
+  @Override
+  public long ramBytesUsed() {
+    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues();
+  }
+
+  private long sizeOfValues() {
+    long size = RamUsageEstimator.shallowSizeOf(values);
+    for (ObjectCursor<VType> value : values()) {
+      size += RamUsageEstimator.sizeOfObject(value);
+    }
+    return size;
+  }
+
   /** An iterator implementation for {@link #iterator}. */
   private final class EntryIterator extends AbstractIterator<IntObjectCursor<VType>> {
     private final IntObjectCursor<VType> cursor;
@@ -869,21 +866,4 @@ public String toString() {
       return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]";
     }
   }
-
-  /** Forked from HPPC, holding int index and Object value */
-  public static final class ObjectCursor<VType> {
-    /**
-     * The current value's index in the container this cursor belongs to. The meaning of this index
-     * is defined by the container (usually it will be an index in the underlying storage buffer).
-     */
-    public int index;
-
-    /** The current value. */
-    public VType value;
-
-    @Override
-    public String toString() {
-      return "[cursor, index: " + index + ", value: " + value + "]";
-    }
-  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hppc;
+
+/** Forked from HPPC, holding int index and long value */
+public final class LongCursor {
+  /**
+   * The current value's index in the container this cursor belongs to. The meaning of this index is
+   * defined by the container (usually it will be an index in the underlying storage buffer).
+   */
+  public int index;
+
+  /** The current value. */
+  public long value;
+
+  @Override
+  public String toString() {
+    return "[cursor, index: " + index + ", value: " + value + "]";
+  }
+}