Skip to content

Commit

Permalink
Replace Map<Long, Object> by primitive LongObjectHashMap. (#13392)
Browse files Browse the repository at this point in the history
Add LongObjectHashMap and replace Map<Long, Object>.
Add LongIntHashMap and replace Map<Long, Int>.
Add HPPC dependency to join and spatial modules for primitive values float and double.
  • Loading branch information
bruno-roustant committed May 23, 2024
1 parent f07038d commit 816db79
Show file tree
Hide file tree
Showing 26 changed files with 3,280 additions and 265 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ Optimizations

* GITHUB#13368: Replace Map<Integer, Object> by primitive IntObjectHashMap. (Bruno Roustant)

* GITHUB#13392: Replace Map<Long, Object> by primitive LongObjectHashMap. (Bruno Roustant)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.List;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;

/**
* Graph representing possible token pairs (bigrams) at each start offset in the sentence.
Expand Down Expand Up @@ -218,8 +219,7 @@ public List<SegToken> getShortPath() {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList :
tokenPairListTable.values()) {
for (ObjectCursor<ArrayList<SegTokenPair>> segList : tokenPairListTable.values()) {
for (SegTokenPair pair : segList.value) {
sb.append(pair).append("\n");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
Expand Down Expand Up @@ -54,6 +52,7 @@
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.hppc.LongIntHashMap;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.apache.lucene.util.packed.DirectWriter;

Expand Down Expand Up @@ -273,7 +272,7 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo
meta.writeLong(numValues);
final int numBitsPerValue;
boolean doBlocks = false;
Map<Long, Integer> encode = null;
LongIntHashMap encode = null;
if (min >= max) { // meta[-1]: All values are 0
numBitsPerValue = 0;
meta.writeInt(-1); // tablesize
Expand All @@ -289,7 +288,7 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo
for (Long v : sortedUniqueValues) {
meta.writeLong(v); // table[] entry
}
encode = new HashMap<>();
encode = new LongIntHashMap();
for (int i = 0; i < sortedUniqueValues.length; ++i) {
encode.put(sortedUniqueValues[i], i);
}
Expand Down Expand Up @@ -339,7 +338,7 @@ private void writeValuesSingleBlock(
int numBitsPerValue,
long min,
long gcd,
Map<Long, Integer> encode)
LongIntHashMap encode)
throws IOException {
DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
Expand Down
16 changes: 9 additions & 7 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.hppc.LongObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;

/**
* An <code>IndexWriter</code> creates and maintains an index.
Expand Down Expand Up @@ -4387,7 +4389,7 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(
final ReadersAndUpdates mergedDeletesAndUpdates = getPooledInstance(merge.info, true);
int numDeletesBefore = mergedDeletesAndUpdates.getDelCount();
// field -> delGen -> dv field updates
Map<String, Map<Long, DocValuesFieldUpdates>> mappedDVUpdates = new HashMap<>();
Map<String, LongObjectHashMap<DocValuesFieldUpdates>> mappedDVUpdates = new HashMap<>();

boolean anyDVUpdates = false;

Expand Down Expand Up @@ -4420,9 +4422,9 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(

String field = ent.getKey();

Map<Long, DocValuesFieldUpdates> mappedField = mappedDVUpdates.get(field);
LongObjectHashMap<DocValuesFieldUpdates> mappedField = mappedDVUpdates.get(field);
if (mappedField == null) {
mappedField = new HashMap<>();
mappedField = new LongObjectHashMap<>();
mappedDVUpdates.put(field, mappedField);
}

Expand Down Expand Up @@ -4478,10 +4480,10 @@ private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(

if (anyDVUpdates) {
// Persist the merged DV updates onto the RAU for the merged segment:
for (Map<Long, DocValuesFieldUpdates> d : mappedDVUpdates.values()) {
for (DocValuesFieldUpdates updates : d.values()) {
updates.finish();
mergedDeletesAndUpdates.addDVUpdate(updates);
for (LongObjectHashMap<DocValuesFieldUpdates> d : mappedDVUpdates.values()) {
for (ObjectCursor<DocValuesFieldUpdates> updates : d.values()) {
updates.value.finish();
mergedDeletesAndUpdates.addDVUpdate(updates.value);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,23 @@
package org.apache.lucene.index;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RefCount;
import org.apache.lucene.util.hppc.LongObjectHashMap;

/**
* Manages the {@link DocValuesProducer} held by {@link SegmentReader} and keeps track of their
* reference counting.
*/
final class SegmentDocValues {

private final Map<Long, RefCount<DocValuesProducer>> genDVProducers = new HashMap<>();
private final LongObjectHashMap<RefCount<DocValuesProducer>> genDVProducers =
new LongObjectHashMap<>();

private RefCount<DocValuesProducer> newDocValuesProducer(
SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.util.hppc;

import java.util.concurrent.atomic.AtomicInteger;

/** Constants for primitive maps. */
public class HashContainers {

public static final int DEFAULT_EXPECTED_ELEMENTS = 4;

public static final float DEFAULT_LOAD_FACTOR = 0.75f;

/** Minimal sane load factor (99 empty slots per 100). */
public static final float MIN_LOAD_FACTOR = 1 / 100.0f;

/** Maximum sane load factor (1 empty slot per 100). */
public static final float MAX_LOAD_FACTOR = 99 / 100.0f;

/** Minimum hash buffer size. */
public static final int MIN_HASH_ARRAY_LENGTH = 4;

/**
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a
* negative int).
*/
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;

static final AtomicInteger ITERATION_SEED = new AtomicInteger();
}
33 changes: 11 additions & 22 deletions lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
package org.apache.lucene.util.hppc;

import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.*;

import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
* A hash map of <code>int</code> to <code>int</code>, implemented using open addressing with linear
Expand All @@ -31,28 +33,10 @@
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Cloneable {
public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Accountable, Cloneable {

public static final int DEFAULT_EXPECTED_ELEMENTS = 4;

public static final float DEFAULT_LOAD_FACTOR = 0.75f;

private static final AtomicInteger ITERATION_SEED = new AtomicInteger();

/** Minimal sane load factor (99 empty slots per 100). */
public static final float MIN_LOAD_FACTOR = 1 / 100.0f;

/** Maximum sane load factor (1 empty slot per 100). */
public static final float MAX_LOAD_FACTOR = 99 / 100.0f;

/** Minimum hash buffer size. */
public static final int MIN_HASH_ARRAY_LENGTH = 4;

/**
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a
* negative int).
*/
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(IntIntHashMap.class);

/** The array holding keys. */
public int[] keys;
Expand Down Expand Up @@ -463,6 +447,11 @@ protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(values);
}

/** An iterator implementation for {@link #iterator}. */
private final class EntryIterator extends AbstractIterator<IntIntCursor> {
private final IntIntCursor cursor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,27 @@
package org.apache.lucene.util.hppc;

import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.*;

import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
* A hash map of <code>int</code> to <code>Object</code>, implemented using open addressing with
* linear probing for collision resolution.
* linear probing for collision resolution. Supports null values.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntObjectHashMap
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
@SuppressWarnings("unchecked")
public class IntObjectHashMap<VType>
implements Iterable<IntObjectHashMap.IntObjectCursor<VType>>, Cloneable {
implements Iterable<IntObjectHashMap.IntObjectCursor<VType>>, Accountable, Cloneable {

public static final int DEFAULT_EXPECTED_ELEMENTS = 4;

public static final float DEFAULT_LOAD_FACTOR = 0.75f;

private static final AtomicInteger ITERATION_SEED = new AtomicInteger();

/** Minimal sane load factor (99 empty slots per 100). */
public static final float MIN_LOAD_FACTOR = 1 / 100.0f;

/** Maximum sane load factor (1 empty slot per 100). */
public static final float MAX_LOAD_FACTOR = 99 / 100.0f;

/** Minimum hash buffer size. */
public static final int MIN_HASH_ARRAY_LENGTH = 4;

/**
* Maximum array size for hash containers (power-of-two and still allocable in Java, not a
* negative int).
*/
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(IntObjectHashMap.class);

/** The array holding keys. */
public int[] keys;
Expand Down Expand Up @@ -304,7 +288,7 @@ public VType indexGet(int index) {
return (VType) values[index];
}

public VType indexReplace(int index, int newValue) {
public VType indexReplace(int index, VType newValue) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);

Expand Down Expand Up @@ -436,6 +420,19 @@ public Iterator<IntObjectCursor<VType>> iterator() {
return new EntryIterator();
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues();
}

private long sizeOfValues() {
long size = RamUsageEstimator.shallowSizeOf(values);
for (ObjectCursor<VType> value : values()) {
size += RamUsageEstimator.sizeOfObject(value);
}
return size;
}

/** An iterator implementation for {@link #iterator}. */
private final class EntryIterator extends AbstractIterator<IntObjectCursor<VType>> {
private final IntObjectCursor<VType> cursor;
Expand Down Expand Up @@ -869,21 +866,4 @@ public String toString() {
return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]";
}
}

/** Forked from HPPC, holding int index and Object value */
public static final class ObjectCursor<VType> {
/**
* The current value's index in the container this cursor belongs to. The meaning of this index
* is defined by the container (usually it will be an index in the underlying storage buffer).
*/
public int index;

/** The current value. */
public VType value;

@Override
public String toString() {
return "[cursor, index: " + index + ", value: " + value + "]";
}
}
}
35 changes: 35 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.util.hppc;

/** Forked from HPPC, holding int index and long value */
public final class LongCursor {
/**
* The current value's index in the container this cursor belongs to. The meaning of this index is
* defined by the container (usually it will be an index in the underlying storage buffer).
*/
public int index;

/** The current value. */
public long value;

@Override
public String toString() {
return "[cursor, index: " + index + ", value: " + value + "]";
}
}
Loading

0 comments on commit 816db79

Please sign in to comment.