LUCENE-9387: Remove CodecReader#ramBytesUsed. (#79)

This commit removes `ramBytesUsed()` from `CodecReader` and all file formats besides vectors, which is the only remaining file format that might use lots of memory in the default codec. I left `ramBytesUsed()` on the `completion` format too, which is another feature that could use lots of memory. Other components that relied on being able to compute memory usage of readers like facets' TaxonomyReader and the analyzing suggester assume that readers have a RAM usage of 0 now.
apache · Apr 14, 2021 · 79f14b1 · 79f14b1
1 parent fbbdc62
commit 79f14b1
Show file tree

Hide file tree

Showing 91 changed files with 65 additions and 1,320 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -101,6 +101,8 @@ API Changes
 * LUCENE-9853: JapaneseAnalyzer should use CJKWidthCharFilter for full-width and half-width character normalization.
   (Tomoko Uchida)
 
+* LUCENE-9387: Removed CodecReader#ramBytesUsed. (Adrien Grand)
+
 Improvements
 
 * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,

diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
@@ -376,3 +376,8 @@ better to use the ordinal alone (integer-based datastructures) for per-document
 call lookupOrd() a few times at the end (e.g. for the hits you want to display). Otherwise, if you
 really don't want per-document ordinals, but instead a per-document `byte[]`, use a BinaryDocValues
 field.
+
+## Removed CodecReader.ramBytesUsed() (LUCENE-9387)
+
+Lucene index readers are now using so little memory with the default codec that
+it was decided to remove the ability to estimate their RAM usage.
diff --git a/...ard-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java b/...ard-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
@@ -17,18 +17,13 @@
 package org.apache.lucene.backward_codecs.lucene40.blocktree;
 
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
@@ -39,14 +34,10 @@
  *
  * @lucene.internal
  */
-public final class FieldReader extends Terms implements Accountable {
+public final class FieldReader extends Terms {
 
   // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
 
-  private static final long BASE_RAM_BYTES_USED =
-      RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
-          + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
-
   final long numTerms;
   final FieldInfo fieldInfo;
   final long sumTotalTermFreq;
@@ -200,20 +191,6 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw
         this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return BASE_RAM_BYTES_USED + ((index != null) ? index.ramBytesUsed() : 0);
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    if (index == null) {
-      return Collections.emptyList();
-    } else {
-      return Collections.singleton(Accountables.namedAccountable("term index", index));
-    }
-  }
-
   @Override
   public String toString() {
     return "BlockTreeTerms(seg="

diff --git a/...va/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java b/...va/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -35,8 +34,6 @@
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
@@ -373,23 +370,6 @@ String brToString(BytesRef b) {
     }
   }
 
-  @Override
-  public long ramBytesUsed() {
-    long sizeInBytes = postingsReader.ramBytesUsed();
-    for (FieldReader reader : fieldMap.values()) {
-      sizeInBytes += reader.ramBytesUsed();
-    }
-    return sizeInBytes;
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    List<Accountable> resources =
-        new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
-    resources.add(Accountables.namedAccountable("delegate", postingsReader));
-    return Collections.unmodifiableList(resources);
-  }
-
   @Override
   public void checkIntegrity() throws IOException {
     // terms index

diff --git a/...rd-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50PostingsReader.java b/...rd-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50PostingsReader.java
@@ -46,7 +46,6 @@
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
@@ -55,9 +54,6 @@
  */
 public final class Lucene50PostingsReader extends PostingsReaderBase {
 
-  private static final long BASE_RAM_BYTES_USED =
-      RamUsageEstimator.shallowSizeOfInstance(Lucene50PostingsReader.class);
-
   private final IndexInput docIn;
   private final IndexInput posIn;
   private final IndexInput payIn;
@@ -1844,11 +1840,6 @@ public long cost() {
     }
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return BASE_RAM_BYTES_USED;
-  }
-
   @Override
   public void checkIntegrity() throws IOException {
     if (docIn != null) {

diff --git a/...d-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/FieldsIndex.java b/...d-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/FieldsIndex.java
@@ -18,9 +18,8 @@
 
 import java.io.Closeable;
 import java.io.IOException;
-import org.apache.lucene.util.Accountable;
 
-abstract class FieldsIndex implements Accountable, Cloneable, Closeable {
+abstract class FieldsIndex implements Cloneable, Closeable {
 
   /** Get the start pointer for the block that contains the given docID. */
   abstract long getStartPointer(int docID);

diff --git a/...cs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/FieldsIndexReader.java b/...cs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/FieldsIndexReader.java
@@ -25,17 +25,13 @@
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.DirectMonotonicReader;
 
 final class FieldsIndexReader extends FieldsIndex {
 
   static final int VERSION_START = 0;
   static final int VERSION_CURRENT = 0;
 
-  private static final long BASE_RAM_BYTES_USED =
-      RamUsageEstimator.shallowSizeOfInstance(FieldsIndexReader.class);
-
   private final int maxDoc;
   private final int blockShift;
   private final int numChunks;
@@ -111,15 +107,6 @@ private FieldsIndexReader(FieldsIndexReader other) throws IOException {
     startPointers = DirectMonotonicReader.getInstance(startPointersMeta, startPointersSlice);
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return BASE_RAM_BYTES_USED
-        + docsMeta.ramBytesUsed()
-        + startPointersMeta.ramBytesUsed()
-        + docs.ramBytesUsed()
-        + startPointers.ramBytesUsed();
-  }
-
   @Override
   public void close() throws IOException {
     indexInput.close();

diff --git a/.../java/org/apache/lucene/backward_codecs/lucene50/compressing/LegacyFieldsIndexReader.java b/.../java/org/apache/lucene/backward_codecs/lucene50/compressing/LegacyFieldsIndexReader.java
@@ -19,24 +19,14 @@
 import static org.apache.lucene.util.BitUtil.zigZagDecode;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 
 final class LegacyFieldsIndexReader extends FieldsIndex {
 
-  private static final long BASE_RAM_BYTES_USED =
-      RamUsageEstimator.shallowSizeOfInstance(LegacyFieldsIndexReader.class);
-
   final int maxDoc;
   final int[] docBases;
   final long[] startPointers;
@@ -177,46 +167,6 @@ public LegacyFieldsIndexReader clone() {
     return this;
   }
 
-  @Override
-  public long ramBytesUsed() {
-    long res = BASE_RAM_BYTES_USED;
-
-    res += RamUsageEstimator.shallowSizeOf(docBasesDeltas);
-    for (PackedInts.Reader r : docBasesDeltas) {
-      res += r.ramBytesUsed();
-    }
-    res += RamUsageEstimator.shallowSizeOf(startPointersDeltas);
-    for (PackedInts.Reader r : startPointersDeltas) {
-      res += r.ramBytesUsed();
-    }
-
-    res += RamUsageEstimator.sizeOf(docBases);
-    res += RamUsageEstimator.sizeOf(startPointers);
-    res += RamUsageEstimator.sizeOf(avgChunkDocs);
-    res += RamUsageEstimator.sizeOf(avgChunkSizes);
-
-    return res;
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    List<Accountable> resources = new ArrayList<>();
-
-    long docBaseDeltaBytes = RamUsageEstimator.shallowSizeOf(docBasesDeltas);
-    for (PackedInts.Reader r : docBasesDeltas) {
-      docBaseDeltaBytes += r.ramBytesUsed();
-    }
-    resources.add(Accountables.namedAccountable("doc base deltas", docBaseDeltaBytes));
-
-    long startPointerDeltaBytes = RamUsageEstimator.shallowSizeOf(startPointersDeltas);
-    for (PackedInts.Reader r : startPointersDeltas) {
-      startPointerDeltaBytes += r.ramBytesUsed();
-    }
-    resources.add(Accountables.namedAccountable("start pointer deltas", startPointerDeltaBytes));
-
-    return Collections.unmodifiableList(resources);
-  }
-
   @Override
   public String toString() {
     return getClass().getSimpleName() + "(blocks=" + docBases.length + ")";

diff --git a/...he/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java b/...he/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java
@@ -19,8 +19,6 @@
 import java.io.EOFException;
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.codecs.compressing.CompressionMode;
@@ -39,8 +37,6 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
@@ -791,16 +787,6 @@ int getPackedIntsVersion() {
     return packedIntsVersion;
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return indexReader.ramBytesUsed();
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    return Collections.singleton(Accountables.namedAccountable("stored field index", indexReader));
-  }
-
   @Override
   public void checkIntegrity() throws IOException {
     indexReader.checkIntegrity();

diff --git a/...che/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java b/...che/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java
@@ -17,8 +17,6 @@
 package org.apache.lucene.backward_codecs.lucene50.compressing;
 
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 import org.apache.lucene.codecs.CodecUtil;
@@ -43,8 +41,6 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
@@ -1337,16 +1333,6 @@ private static int sum(int[] arr) {
     return sum;
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return indexReader.ramBytesUsed();
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    return Collections.singleton(Accountables.namedAccountable("term vector index", indexReader));
-  }
-
   @Override
   public void checkIntegrity() throws IOException {
     indexReader.checkIntegrity();

diff --git a/...ward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsReader.java b/...ward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60PointsReader.java
@@ -130,11 +130,6 @@ public PointValues getValues(String fieldName) {
     return readers.get(fieldInfo.number);
   }
 
-  @Override
-  public long ramBytesUsed() {
-    return 0L;
-  }
-
   @Override
   public void checkIntegrity() throws IOException {
     CodecUtil.checksumEntireFile(dataIn);