Skip to content

Commit

Permalink
Make text index query cache a configurable option
Browse files Browse the repository at this point in the history
  • Loading branch information
Siddharth Teotia committed Apr 3, 2020
1 parent 221e73a commit ec311dc
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ public PhysicalColumnIndexContainer(SegmentDirectory.Reader segmentReader, Colum
_dictionary = null;
_bloomFilterReader = null;
if (loadTextIndex) {
_invertedIndex = new LuceneTextIndexReader(columnName, segmentIndexDir, metadata.getTotalDocs());
_invertedIndex = new LuceneTextIndexReader(columnName, segmentIndexDir, metadata.getTotalDocs(),
indexLoadingConfig.getColumnsWithProperties().get(columnName));
} else {
_invertedIndex = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ public class IndexLoadingConfig {
private boolean _isDirectRealtimeOffheapAllocation;
private boolean _enableSplitCommitEndWithMetadata;

// constructed from FieldConfig
private Map<String, Map<String, String>> _columnsWithProperties;

public IndexLoadingConfig(@Nonnull InstanceDataManagerConfig instanceDataManagerConfig,
@Nonnull TableConfig tableConfig) {
extractFromInstanceConfig(instanceDataManagerConfig);
Expand Down Expand Up @@ -93,6 +96,7 @@ private void extractFromTableConfig(@Nonnull TableConfig tableConfig) {
_noDictionaryColumns.addAll(noDictionaryColumns);
}

_columnsWithProperties = new HashMap<>();
extractTextIndexColumnsFromTableConfig(tableConfig);

Map<String, String> noDictionaryConfig = indexingConfig.getNoDictionaryConfig();
Expand Down Expand Up @@ -136,11 +140,12 @@ private void extractTextIndexColumnsFromTableConfig(TableConfig tableConfig) {
for (FieldConfig fieldConfig : fieldConfigList) {
String column = fieldConfig.getName();
if (fieldConfig.getIndexType() == FieldConfig.IndexType.TEXT) {
if (fieldConfig.getEncodingType() != FieldConfig.EncodingType.RAW || !_noDictionaryColumns.contains(fieldConfig.getName())) {
if (fieldConfig.getEncodingType() != FieldConfig.EncodingType.RAW || !_noDictionaryColumns.contains(column)) {
throw new UnsupportedOperationException("Text index is currently not supported on dictionary encoded column: " + column);
}
_textIndexColumns.add(fieldConfig.getName());
_textIndexColumns.add(column);
}
_columnsWithProperties.put(column, fieldConfig.getProperties());
}
}
}
Expand Down Expand Up @@ -196,6 +201,11 @@ public Set<String> getInvertedIndexColumns() {
return _invertedIndexColumns;
}

@Nonnull
public Map<String, Map<String, String>> getColumnsWithProperties() {
return _columnsWithProperties;
}

/**
* Used in two places:
* (1) In {@link org.apache.pinot.core.segment.index.column.PhysicalColumnIndexContainer}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import java.io.File;
import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
Expand All @@ -36,7 +39,7 @@
import org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.SegmentDirectoryPaths;
import org.roaringbitmap.IntIterator;
import org.apache.pinot.spi.config.FieldConfig;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -67,16 +70,20 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
* @param indexDir segment index directory
* @param numDocs number of documents in the segment
*/
public LuceneTextIndexReader(String column, File indexDir, int numDocs) {
public LuceneTextIndexReader(String column, File indexDir, int numDocs,
@Nullable Map<String, String> textIndexProperties) {
_column = column;
try {
File indexFile = getTextIndexFile(indexDir);
_indexDirectory = FSDirectory.open(indexFile.toPath());
_indexReader = DirectoryReader.open(_indexDirectory);
_indexSearcher = new IndexSearcher(_indexReader);
// Disable Lucene query result cache. While it helps a lot with performance for
// repeated queries, on the downside it cause heap issues.
_indexSearcher.setQueryCache(null);
if (textIndexProperties == null || textIndexProperties.get(FieldConfig.TEXT_INDEX_ENABLE_QUERY_CACHE) == null
|| !textIndexProperties.get(FieldConfig.TEXT_INDEX_ENABLE_QUERY_CACHE).equalsIgnoreCase("true")) {
// Disable Lucene query result cache. While it helps a lot with performance for
// repeated queries, on the downside it cause heap issues.
_indexSearcher.setQueryCache(null);
}
// TODO: consider using a threshold of num docs per segment to decide between building
// mapping file upfront on segment load v/s on-the-fly during query processing
_docIdTranslator = new DocIdTranslator(indexDir, _column, numDocs, _indexSearcher);
Expand All @@ -85,8 +92,7 @@ public LuceneTextIndexReader(String column, File indexDir, int numDocs) {
.error("Failed to instantiate Lucene text index reader for column {}, exception {}", column, e.getMessage());
throw new RuntimeException(e);
}
StandardAnalyzer analyzer = new StandardAnalyzer();
_queryParser = new QueryParser(column, analyzer);
_queryParser = new QueryParser(column, new StandardAnalyzer());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,14 @@ public class FieldConfig extends BaseJsonConfig {
private final IndexType _indexType;
private final Map<String, String> _properties;

public static String BLOOM_FILTER_COLUMN_KEY = "field.config.bloom.filter";
public static String ON_HEAP_DICTIONARY_COLUMN_KEY = "field.config.onheap.dictionary";
public static String TEXT_INDEX_REALTIME_READER_REFRESH_KEY = "field.config.realtime.reader.refresh";
public static String VAR_LENGTH_DICTIONARY_COLUMN_KEY = "field.config.var.length.dictionary";
public static String BLOOM_FILTER_COLUMN_KEY = "bloom.filter";
public static String ON_HEAP_DICTIONARY_COLUMN_KEY = "onheap.dictionary";
public static String VAR_LENGTH_DICTIONARY_COLUMN_KEY = "var.length.dictionary";

public static String TEXT_INDEX_REALTIME_READER_REFRESH_KEY = "text.index.realtime.reader.refresh";
// Lucene creates a query result cache if this option is enabled
// the cache improves performance of repeatable queries
public static String TEXT_INDEX_ENABLE_QUERY_CACHE = "text.index.enable.query.cache";

@JsonCreator
public FieldConfig(@JsonProperty(value = "name", required = true) String name,
Expand Down

0 comments on commit ec311dc

Please sign in to comment.