Skip to content
Permalink
Browse files

LUCENE-5666: get solr started

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5666@1594254 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
rmuir committed May 13, 2014
1 parent a2e4ce4 commit 1489085807cb10981a7ea5b5663ada4e3f85953e
Showing with 262 additions and 181 deletions.
  1. +16 −4 lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java
  2. +10 −0 solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
  3. +1 −1 solr/core/src/java/org/apache/solr/core/SolrCore.java
  4. +5 −2 solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
  5. +2 −160 solr/core/src/java/org/apache/solr/request/SimpleFacets.java
  6. +6 −0 solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java
  7. +10 −0 solr/core/src/java/org/apache/solr/schema/BinaryField.java
  8. +10 −0 solr/core/src/java/org/apache/solr/schema/BoolField.java
  9. +10 −0 solr/core/src/java/org/apache/solr/schema/CollationField.java
  10. +6 −0 solr/core/src/java/org/apache/solr/schema/CurrencyField.java
  11. +10 −0 solr/core/src/java/org/apache/solr/schema/EnumField.java
  12. +6 −0 solr/core/src/java/org/apache/solr/schema/ExternalFileField.java
  13. +11 −0 solr/core/src/java/org/apache/solr/schema/FieldType.java
  14. +11 −0 solr/core/src/java/org/apache/solr/schema/GeoHashField.java
  15. +20 −0 solr/core/src/java/org/apache/solr/schema/IndexSchema.java
  16. +6 −0 solr/core/src/java/org/apache/solr/schema/LatLonType.java
  17. +6 −0 solr/core/src/java/org/apache/solr/schema/PointType.java
  18. +17 −2 solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
  19. +6 −0 solr/core/src/java/org/apache/solr/schema/RandomSortField.java
  20. +10 −0 solr/core/src/java/org/apache/solr/schema/StrField.java
  21. +14 −1 solr/core/src/java/org/apache/solr/schema/TextField.java
  22. +6 −0 solr/core/src/java/org/apache/solr/schema/TrieDateField.java
  23. +34 −0 solr/core/src/java/org/apache/solr/schema/TrieField.java
  24. +20 −2 solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
  25. +1 −1 solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
  26. +1 −1 solr/core/src/test-files/solr/collection1/conf/schema.xml
  27. +1 −1 solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java
  28. +6 −6 solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java
@@ -220,29 +220,37 @@ public FieldInfos getFieldInfos() {
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (mapping.get(field)) {
switch (v) {
case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true);
case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
default:
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
return super.getNumericDocValues(field);
}

@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
if (mapping.get(field) == Type.BINARY) {
Type v = mapping.get(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else if (v != null && v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getBinaryDocValues(field);
}
}

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
if (mapping.get(field) == Type.SORTED) {
Type v = mapping.get(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else if (v != null) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getSortedDocValues(field);
}
@@ -252,7 +260,7 @@ public SortedDocValues getSortedDocValues(String field) throws IOException {
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (mapping.get(field)) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
@@ -261,6 +269,10 @@ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
default:
if (v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
}
return in.getSortedSetDocValues(field);
@@ -38,6 +38,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.ResourceLoader;
@@ -224,6 +225,15 @@ public void write(TextResponseWriter writer, String name, StorableField f) throw
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
}

@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}

@Override
public Analyzer getIndexAnalyzer() {
@@ -1461,7 +1461,7 @@ public UpdateHandler getUpdateHandler() {
if (newestSearcher != null && (nrt || indexDirFile.equals(newIndexDirFile))) {

DirectoryReader newReader;
DirectoryReader currentReader = newestSearcher.get().getIndexReader();
DirectoryReader currentReader = newestSearcher.get().getRawReader();

// SolrCore.verbose("start reopen from",previousSearcher,"writer=",writer);

@@ -60,10 +60,13 @@ private DocValuesFacets() {}
SchemaField schemaField = searcher.getSchema().getField(fieldName);
FieldType ft = schemaField.getType();
NamedList<Integer> res = new NamedList<>();

// TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

final SortedSetDocValues si; // for term lookups only
OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
if (schemaField.multiValued()) {
if (multiValued) {
si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
if (si instanceof MultiSortedSetDocValues) {
ordinalMap = ((MultiSortedSetDocValues)si).mapping;
@@ -126,7 +129,7 @@ private DocValuesFacets() {}
disi = dis.iterator();
}
if (disi != null) {
if (schemaField.multiValued()) {
if (multiValued) {
SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
if (sub == null) {
sub = DocValues.EMPTY_SORTED_SET;
@@ -378,18 +378,13 @@ public int getGroupedFacetQueryCount(Query facetQuery) throws IOException {

final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

if (method == null && ft.getNumericType() != null && !sf.multiValued()) {
if (ft.getNumericType() != null && !sf.multiValued()) {
// the per-segment approach is optimal for numeric field types since there
// are no global ords to merge and no need to create an expensive
// top-level reader
method = FacetMethod.FCS;
}

if (ft.getNumericType() != null && sf.hasDocValues()) {
// only fcs is able to leverage the numeric field caches
method = FacetMethod.FCS;
}

This comment has been minimized.

if (method == null) {
// TODO: default to per-segment or not?
method = FacetMethod.FC;
@@ -430,14 +425,7 @@ public int getGroupedFacetQueryCount(Query facetQuery) throws IOException {
}
break;
case FC:
if (sf.hasDocValues()) {
counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
} else if (multiToken || TrieField.getMainValuePrefix(ft) != null) {
UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix);
} else {
counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
}
counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
break;
default:
throw new AssertionError();
@@ -622,152 +610,6 @@ public static int getFieldMissingCount(SolrIndexSearcher searcher, DocSet docs,
return docs.andNotSize(hasVal);
}


/**
* Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
* The field must have at most one indexed token per document.
*/
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
// TODO: If the number of terms is high compared to docs.size(), and zeros==false,
// we should use an alternate strategy to avoid
// 1) creating another huge int[] for the counts
// 2) looping over that huge int[] looking for the rare non-zeros.
//
// Yet another variation: if docs.size() is small and termvectors are stored,
// then use them instead of the FieldCache.
//

// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.

FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList<Integer> res = new NamedList<>();

SortedDocValues si = DocValues.getSorted(searcher.getAtomicReader(), fieldName);

final BytesRef br = new BytesRef();

final BytesRef prefixRef;
if (prefix == null) {
prefixRef = null;
} else if (prefix.length()==0) {
prefix = null;
prefixRef = null;
} else {
prefixRef = new BytesRef(prefix);
}

int startTermIndex, endTermIndex;
if (prefix!=null) {
startTermIndex = si.lookupTerm(prefixRef);
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = si.lookupTerm(prefixRef);
assert endTermIndex < 0;
endTermIndex = -endTermIndex-1;
} else {
startTermIndex=-1;
endTermIndex=si.getValueCount();
}

final int nTerms=endTermIndex-startTermIndex;
int missingCount = -1;
final CharsRef charsRef = new CharsRef(10);
if (nTerms>0 && docs.size() >= mincount) {

// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];

DocIterator iter = docs.iterator();

while (iter.hasNext()) {
int term = si.getOrd(iter.nextDoc());
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}

if (startTermIndex == -1) {
missingCount = counts[0];
}

// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'

int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;

if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, nTerms);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);

int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).

// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced) min=(int)(queue.top() >>> 32);
}
}

// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;

// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);

for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int)(pair >>> 32);
int tnum = Integer.MAX_VALUE - (int)pair;
si.lookupOrd(startTermIndex+tnum, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}

} else {
// add results in index order
int i=(startTermIndex==-1)?1:0;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i+=off;
off=0;
}

for (; i<nTerms; i++) {
int c = counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
si.lookupOrd(startTermIndex+i, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
}
}

if (missing) {
if (missingCount < 0) {
missingCount = getFieldMissingCount(searcher,docs,fieldName);
}
res.add(null, missingCount);
}

return res;
}


/**
* Returns a list of terms in the specified field along with the
* corresponding count of documents in the set that match that constraint.
@@ -40,6 +40,7 @@
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter;
@@ -128,6 +129,11 @@ public final Field createField(SchemaField field, Object val, float boost) {
throw new IllegalStateException("instead call createFields() because isPolyField() is true");
}

@Override
public Type getUninversionType(SchemaField sf) {
return null;
}

@Override
public List<StorableField> createFields(SchemaField field, Object val, float boost) {
String shapeStr = null;
@@ -23,6 +23,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64;
import org.apache.solr.response.TextResponseWriter;
@@ -44,6 +45,15 @@ public SortField getSortField(SchemaField field, boolean top) {
throw new RuntimeException("Cannot sort on a Binary field");
}

@Override
public Type getUninversionType(SchemaField sf) {
// TODO: maybe just return null?
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.BINARY;
}
}

@Override
public String toExternal(StorableField f) {
@@ -34,6 +34,7 @@
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.queries.function.valuesource.OrdFieldSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.mutable.MutableValue;
@@ -51,6 +52,15 @@ public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}

@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}

@Override
public ValueSource getValueSource(SchemaField field, QParser qparser) {
field.checkFieldCacheSource(qparser);
@@ -42,6 +42,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.ResourceLoader;
@@ -199,6 +200,15 @@ public void write(TextResponseWriter writer, String name, StorableField f) throw
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
}

@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}

@Override
public Analyzer getIndexAnalyzer() {

0 comments on commit 1489085

Please sign in to comment.
You can’t perform that action at this time.