Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to lucene 7.5 #181

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions luwak/src/main/java/uk/co/flax/luwak/Monitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ private void prepareQueryCache(boolean storeQueries) throws IOException {
}
seenIds.add(id);

BytesRef serializedMQ = dataValues.mq.get(dataValues.doc);
dataValues.advance(dataValues.doc);
BytesRef serializedMQ = dataValues.mq.binaryValue();
MonitorQuery mq = MonitorQuery.deserialize(serializedMQ);

BytesRef hash = mq.hash();
Expand Down Expand Up @@ -494,7 +495,8 @@ public MonitorQuery getQuery(final String queryId) throws IOException {
throw new IllegalStateException("Cannot call getQuery() as queries are not stored");
final MonitorQuery[] queryHolder = new MonitorQuery[]{ null };
queryIndex.search(new TermQuery(new Term(FIELDS.id, queryId)), (id, query, dataValues) -> {
BytesRef serializedMQ = dataValues.mq.get(dataValues.doc);
dataValues.advance(dataValues.doc);
BytesRef serializedMQ = dataValues.mq.binaryValue();
queryHolder[0] = MonitorQuery.deserialize(serializedMQ);
});
return queryHolder[0];
Expand Down
23 changes: 16 additions & 7 deletions luwak/src/main/java/uk/co/flax/luwak/QueryIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class QueryIndex {
// NB this is not final because it can be replaced by purgeCache()

// package-private for testing
final Map<IndexReader, QueryTermFilter> termFilters = new HashMap<>();
final Map<IndexReader.CacheKey, QueryTermFilter> termFilters = new HashMap<>();

QueryIndex(IndexWriter indexWriter) throws IOException {
this.writer = indexWriter;
Expand All @@ -46,8 +46,8 @@ private class TermsHashBuilder extends SearcherFactory {
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
IndexSearcher searcher = super.newSearcher(reader, previousReader);
searcher.setQueryCache(null);
termFilters.put(reader, new QueryTermFilter(reader));
reader.addReaderClosedListener(termFilters::remove);
termFilters.put(reader.getReaderCacheHelper().getKey(), new QueryTermFilter(reader));
reader.getReaderCacheHelper().addClosedListener(termFilters::remove);
return searcher;
}
}
Expand Down Expand Up @@ -108,7 +108,7 @@ long search(QueryBuilder queryBuilder, QueryCollector matcher) throws IOExceptio

MonitorQueryCollector collector = new MonitorQueryCollector(queries, matcher);
long buildTime = System.nanoTime();
Query query = queryBuilder.buildQuery(termFilters.get(searcher.getIndexReader()));
Query query = queryBuilder.buildQuery(termFilters.get(searcher.getIndexReader().getReaderCacheHelper().getKey()));
buildTime = System.nanoTime() - buildTime;
searcher.search(query, collector);
return buildTime;
Expand Down Expand Up @@ -222,6 +222,15 @@ static final class DataValues {
public BinaryDocValues mq;
public Scorer scorer;
public int doc;

void advance(int doc) throws IOException {
this.doc = doc;
hash.advanceExact(doc);
id.advanceExact(doc);
if (mq != null) {
mq.advanceExact(doc);
}
}
}

/**
Expand All @@ -245,10 +254,10 @@ public void setScorer(Scorer scorer) throws IOException {

@Override
public void collect(int doc) throws IOException {
BytesRef hash = dataValues.hash.get(doc);
BytesRef id = dataValues.id.get(doc);
dataValues.advance(doc);
BytesRef hash = dataValues.hash.binaryValue();
BytesRef id = dataValues.id.binaryValue();
QueryCacheEntry query = queries.get(hash);
dataValues.doc = doc;
matcher.matchQuery(id.utf8ToString(), query, dataValues);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public Query adjustPresearcherQuery(LeafReader reader, Query presearcherQuery) t

private Query buildFilterClause(LeafReader reader) throws IOException {

Terms terms = reader.fields().terms(field);
Terms terms = reader.terms(field);
if (terms == null)
return null;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
package uk.co.flax.luwak.presearcher;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import uk.co.flax.luwak.analysis.TermsEnumTokenStream;
import uk.co.flax.luwak.termextractor.querytree.QueryTree;
import uk.co.flax.luwak.termextractor.weights.TermWeightor;
import uk.co.flax.luwak.util.CollectionUtils;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/*
* Copyright (c) 2014 Lemur Consulting Ltd.
Expand Down Expand Up @@ -96,27 +95,33 @@ static String field(String field, int pass) {
private class MultipassDocumentQueryBuilder implements DocumentQueryBuilder {

BooleanQuery.Builder[] queries = new BooleanQuery.Builder[passes];
List<List<Term>> terms = new ArrayList<List<Term>>(passes);
Map<String, BytesRefHash> terms = new HashMap<>();

public MultipassDocumentQueryBuilder() {
for (int i = 0; i < queries.length; i++) {
queries[i] = new BooleanQuery.Builder();
terms.add(i, new ArrayList<Term>());
}
}

@Override
public void addTerm(String field, BytesRef term) throws IOException {
for (int i = 0; i < passes; i++) {
terms.get(i).add(new Term(field(field, i), term));
}
BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash());
t.add(term);
}

@Override
public Query build() {
Map<String, BytesRef[]> collectedTerms = new HashMap<>();
for (String field : terms.keySet()) {
collectedTerms.put(field, CollectionUtils.convertHash(terms.get(field)));
}
BooleanQuery.Builder parent = new BooleanQuery.Builder();
for (int i = 0; i < passes; i++) {
parent.add(new TermsQuery(terms.get(i)), BooleanClause.Occur.MUST);
BooleanQuery.Builder child = new BooleanQuery.Builder();
for (String field : terms.keySet()) {
child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD);
}
parent.add(child.build(), BooleanClause.Occur.MUST);
}
return parent.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,20 @@
* limitations under the License.
*/

import java.io.IOException;
import java.io.PrintStream;
import java.util.*;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
Expand All @@ -45,6 +44,15 @@
import uk.co.flax.luwak.termextractor.querytree.QueryTreeViewer;
import uk.co.flax.luwak.termextractor.weights.TermWeightor;
import uk.co.flax.luwak.termextractor.weights.TokenLengthNorm;
import uk.co.flax.luwak.util.CollectionUtils;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Presearcher implementation that uses terms extracted from queries to index
Expand Down Expand Up @@ -96,7 +104,9 @@ public TermFilteredPresearcher(PresearcherComponent... components) {
public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) {
try {
DocumentQueryBuilder queryBuilder = getQueryBuilder();
for (String field : reader.fields()) {
for (FieldInfo fi : reader.getFieldInfos()) {

final String field = fi.name;

TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator());
for (PresearcherComponent component : components) {
Expand Down Expand Up @@ -134,16 +144,21 @@ public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter
protected DocumentQueryBuilder getQueryBuilder() {
return new DocumentQueryBuilder() {

List<Term> terms = new ArrayList<>();
Map<String, BytesRefHash> terms = new HashMap<>();

@Override
public void addTerm(String field, BytesRef term) throws IOException {
terms.add(new Term(field, term));
BytesRefHash hash = terms.computeIfAbsent(field, f -> new BytesRefHash());
hash.add(term);
}

@Override
public Query build() {
return new TermsQuery(terms);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String field : terms.keySet()) {
builder.add(new TermInSetQuery(field, CollectionUtils.convertHash(terms.get(field))), BooleanClause.Occur.SHOULD);
}
return builder.build();
}
};
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ public QueryTree buildTree(QueryAnalyzer builder, TermWeightor weightor, Query q
newFilteringQueryBuilder(BoostedQuery.class, BoostedQuery::getQuery),
newDisjunctionBuilder(DisjunctionMaxQuery.class,
(b, w, q) -> q.getDisjuncts().stream().map(qq -> b.buildTree(qq, w)).collect(Collectors.toList())),
TermsQueryTreeBuilder.INSTANCE,
TermInSetQueryTreeBuilder.INSTANCE,
new QueryTreeBuilder<SpanWithinQuery>(SpanWithinQuery.class) {
@Override
Expand Down
12 changes: 12 additions & 0 deletions luwak/src/main/java/uk/co/flax/luwak/util/CollectionUtils.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package uk.co.flax.luwak.util;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;

import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -45,6 +48,15 @@ public static <T> List<List<T>> partition(List<T> items, int slices) {
return list;
}

public static BytesRef[] convertHash(BytesRefHash hash) {
BytesRef terms[] = new BytesRef[hash.size()];
for (int i = 0; i < terms.length; i++) {
BytesRef t = new BytesRef();
terms[i] = hash.get(i, t);
}
return terms;
}

/**
* Drains the queue as {@link BlockingQueue#drainTo(Collection, int)}, but if the requested
* {@code numElements} elements are not available, it will wait for them up to the specified
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,16 @@ public Query getWrappedQuery() {
}

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {

final Weight innerWeight = inner.createWeight(searcher, needsScores);
final Weight innerWeight = inner.createWeight(searcher, needsScores, boost);

return new Weight(ForceNoBulkScoringQuery.this) {
@Override
public boolean isCacheable(LeafReaderContext leafReaderContext) {
return innerWeight.isCacheable(leafReaderContext);
}

@Override
public void extractTerms(Set<Term> set) {
innerWeight.extractTerms(set);
Expand All @@ -77,16 +82,6 @@ public Explanation explain(LeafReaderContext leafReaderContext, int i) throws IO
return innerWeight.explain(leafReaderContext, i);
}

@Override
public float getValueForNormalization() throws IOException {
return innerWeight.getValueForNormalization();
}

@Override
public void normalize(float v, float v1) {
innerWeight.normalize(v, v1);
}

@Override
public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException {
return innerWeight.scorer(leafReaderContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ public int hashCode() {
}

@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new SpanOffsetWeight(searcher, in.createWeight(searcher, needsScores));
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new SpanOffsetWeight(searcher, in.createWeight(searcher, needsScores, boost), boost);
}

/**
Expand All @@ -107,8 +107,8 @@ private class SpanOffsetWeight extends SpanWeight {

private final SpanWeight in;

private SpanOffsetWeight(IndexSearcher searcher, SpanWeight in) throws IOException {
super(SpanOffsetReportingQuery.this, searcher, termContexts(in));
private SpanOffsetWeight(IndexSearcher searcher, SpanWeight in, float boost) throws IOException {
super(SpanOffsetReportingQuery.this, searcher, termContexts(in), boost);
this.in = in;
}

Expand All @@ -126,5 +126,10 @@ public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws I
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);
}

@Override
public boolean isCacheable(LeafReaderContext leafReaderContext) {
return in.isCacheable(leafReaderContext);
}
}
}
Loading