Skip to content

Commit

Permalink
LUCENE-9387: Remove CodecReader#ramBytesUsed. (#79)
Browse files Browse the repository at this point in the history
This commit removes `ramBytesUsed()` from `CodecReader` and all file formats
besides vectors, which is the only remaining file format that might use lots of
memory in the default codec. I left `ramBytesUsed()` on the `completion` format
too, which is another feature that could use lots of memory.

Other components that relied on being able to compute memory usage of readers
like facets' TaxonomyReader and the analyzing suggester assume that readers have
a RAM usage of 0 now.
  • Loading branch information
jpountz committed Apr 14, 2021
1 parent fbbdc62 commit 79f14b1
Show file tree
Hide file tree
Showing 91 changed files with 65 additions and 1,320 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ API Changes
* LUCENE-9853: JapaneseAnalyzer should use CJKWidthCharFilter for full-width and half-width character normalization.
(Tomoko Uchida)

* LUCENE-9387: Removed CodecReader#ramBytesUsed. (Adrien Grand)

Improvements

* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
Expand Down
5 changes: 5 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,8 @@ better to use the ordinal alone (integer-based datastructures) for per-document
call lookupOrd() a few times at the end (e.g. for the hits you want to display). Otherwise, if you
really don't want per-document ordinals, but instead a per-document `byte[]`, use a BinaryDocValues
field.

## Removed CodecReader.ramBytesUsed() (LUCENE-9387)

Lucene index readers are now using so little memory with the default codec that
it was decided to remove the ability to estimate their RAM usage.
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,13 @@
package org.apache.lucene.backward_codecs.lucene40.blocktree;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
Expand All @@ -39,14 +34,10 @@
*
* @lucene.internal
*/
public final class FieldReader extends Terms implements Accountable {
public final class FieldReader extends Terms {

// private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;

private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
+ 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);

final long numTerms;
final FieldInfo fieldInfo;
final long sumTotalTermFreq;
Expand Down Expand Up @@ -200,20 +191,6 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw
this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + ((index != null) ? index.ramBytesUsed() : 0);
}

@Override
public Collection<Accountable> getChildResources() {
if (index == null) {
return Collections.emptyList();
} else {
return Collections.singleton(Accountables.namedAccountable("term index", index));
}
}

@Override
public String toString() {
return "BlockTreeTerms(seg="
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
Expand All @@ -35,8 +34,6 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
Expand Down Expand Up @@ -373,23 +370,6 @@ String brToString(BytesRef b) {
}
}

@Override
public long ramBytesUsed() {
long sizeInBytes = postingsReader.ramBytesUsed();
for (FieldReader reader : fieldMap.values()) {
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes;
}

@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources =
new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
resources.add(Accountables.namedAccountable("delegate", postingsReader));
return Collections.unmodifiableList(resources);
}

@Override
public void checkIntegrity() throws IOException {
// terms index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;

/**
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
Expand All @@ -55,9 +54,6 @@
*/
public final class Lucene50PostingsReader extends PostingsReaderBase {

private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(Lucene50PostingsReader.class);

private final IndexInput docIn;
private final IndexInput posIn;
private final IndexInput payIn;
Expand Down Expand Up @@ -1844,11 +1840,6 @@ public long cost() {
}
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED;
}

@Override
public void checkIntegrity() throws IOException {
if (docIn != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@

import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.util.Accountable;

abstract class FieldsIndex implements Accountable, Cloneable, Closeable {
abstract class FieldsIndex implements Cloneable, Closeable {

/** Get the start pointer for the block that contains the given docID. */
abstract long getStartPointer(int docID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.DirectMonotonicReader;

final class FieldsIndexReader extends FieldsIndex {

static final int VERSION_START = 0;
static final int VERSION_CURRENT = 0;

private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(FieldsIndexReader.class);

private final int maxDoc;
private final int blockShift;
private final int numChunks;
Expand Down Expand Up @@ -111,15 +107,6 @@ private FieldsIndexReader(FieldsIndexReader other) throws IOException {
startPointers = DirectMonotonicReader.getInstance(startPointersMeta, startPointersSlice);
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED
+ docsMeta.ramBytesUsed()
+ startPointersMeta.ramBytesUsed()
+ docs.ramBytesUsed()
+ startPointers.ramBytesUsed();
}

@Override
public void close() throws IOException {
indexInput.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,14 @@
import static org.apache.lucene.util.BitUtil.zigZagDecode;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;

final class LegacyFieldsIndexReader extends FieldsIndex {

private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(LegacyFieldsIndexReader.class);

final int maxDoc;
final int[] docBases;
final long[] startPointers;
Expand Down Expand Up @@ -177,46 +167,6 @@ public LegacyFieldsIndexReader clone() {
return this;
}

@Override
public long ramBytesUsed() {
long res = BASE_RAM_BYTES_USED;

res += RamUsageEstimator.shallowSizeOf(docBasesDeltas);
for (PackedInts.Reader r : docBasesDeltas) {
res += r.ramBytesUsed();
}
res += RamUsageEstimator.shallowSizeOf(startPointersDeltas);
for (PackedInts.Reader r : startPointersDeltas) {
res += r.ramBytesUsed();
}

res += RamUsageEstimator.sizeOf(docBases);
res += RamUsageEstimator.sizeOf(startPointers);
res += RamUsageEstimator.sizeOf(avgChunkDocs);
res += RamUsageEstimator.sizeOf(avgChunkSizes);

return res;
}

@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();

long docBaseDeltaBytes = RamUsageEstimator.shallowSizeOf(docBasesDeltas);
for (PackedInts.Reader r : docBasesDeltas) {
docBaseDeltaBytes += r.ramBytesUsed();
}
resources.add(Accountables.namedAccountable("doc base deltas", docBaseDeltaBytes));

long startPointerDeltaBytes = RamUsageEstimator.shallowSizeOf(startPointersDeltas);
for (PackedInts.Reader r : startPointersDeltas) {
startPointerDeltaBytes += r.ramBytesUsed();
}
resources.add(Accountables.namedAccountable("start pointer deltas", startPointerDeltaBytes));

return Collections.unmodifiableList(resources);
}

@Override
public String toString() {
return getClass().getSimpleName() + "(blocks=" + docBases.length + ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.compressing.CompressionMode;
Expand All @@ -39,8 +37,6 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -791,16 +787,6 @@ int getPackedIntsVersion() {
return packedIntsVersion;
}

@Override
public long ramBytesUsed() {
return indexReader.ramBytesUsed();
}

@Override
public Collection<Accountable> getChildResources() {
return Collections.singleton(Accountables.namedAccountable("stored field index", indexReader));
}

@Override
public void checkIntegrity() throws IOException {
indexReader.checkIntegrity();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package org.apache.lucene.backward_codecs.lucene50.compressing;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
Expand All @@ -43,8 +41,6 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
Expand Down Expand Up @@ -1337,16 +1333,6 @@ private static int sum(int[] arr) {
return sum;
}

@Override
public long ramBytesUsed() {
return indexReader.ramBytesUsed();
}

@Override
public Collection<Accountable> getChildResources() {
return Collections.singleton(Accountables.namedAccountable("term vector index", indexReader));
}

@Override
public void checkIntegrity() throws IOException {
indexReader.checkIntegrity();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,6 @@ public PointValues getValues(String fieldName) {
return readers.get(fieldInfo.number);
}

@Override
public long ramBytesUsed() {
return 0L;
}

@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(dataIn);
Expand Down

0 comments on commit 79f14b1

Please sign in to comment.