Skip to content

Commit

Permalink
Merge pull request #85 from wolfgangmm/develop
Browse files Browse the repository at this point in the history
New range index: add support for time and date types, regex matches
  • Loading branch information
adamretter committed Nov 18, 2013
2 parents a392f9a + 86ee2b1 commit eade0cf
Show file tree
Hide file tree
Showing 10 changed files with 442 additions and 10 deletions.
Expand Up @@ -5,16 +5,21 @@
import org.apache.lucene.document.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.exist.EXistException;
import org.exist.dom.QName;
import org.exist.indexing.lucene.LuceneIndexConfig;
import org.exist.storage.Indexable;
import org.exist.storage.NodePath;
import org.exist.util.ByteConversion;
import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.XMLString;
import org.exist.xquery.XPathException;
import org.exist.xquery.value.*;
import org.w3c.dom.Element;

import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import java.io.IOException;
import java.util.Map;

Expand Down Expand Up @@ -102,11 +107,25 @@ public Field convertToField(String fieldName, String content) throws IOException
case Type.FLOAT:
float fvalue = Float.parseFloat(content);
return new FloatField(fieldName, fvalue, FloatField.TYPE_NOT_STORED);
case Type.DATE:
DateValue dv = new DateValue(content);
long dl = dateToLong(dv);
return new LongField(fieldName, dl, LongField.TYPE_NOT_STORED);
case Type.TIME:
TimeValue tv = new TimeValue(content);
long tl = timeToLong(tv);
return new LongField(fieldName, tl, LongField.TYPE_NOT_STORED);
case Type.DATE_TIME:
DateTimeValue dtv = new DateTimeValue(content);
String dateStr = dateTimeToString(dtv);
return new TextField(fieldName, dateStr, Field.Store.NO);
default:
return new TextField(fieldName, content, Field.Store.NO);
}
} catch (NumberFormatException e) {
// wrong type: ignore
} catch (XPathException e) {
// wrong type: ignore
}
return null;
}
Expand Down Expand Up @@ -142,11 +161,65 @@ public static BytesRef convertToBytes(AtomicValue content) throws XPathException
bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
NumericUtils.longToPrefixCoded(iv, 0, bytes);
return bytes;
case Type.DATE:
long dl = dateToLong((DateValue)content);
bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
NumericUtils.longToPrefixCoded(dl, 0, bytes);
return bytes;
case Type.TIME:
long tl = timeToLong((TimeValue) content);
bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
NumericUtils.longToPrefixCoded(tl, 0, bytes);
return bytes;
case Type.DATE_TIME:
String dt = dateTimeToString((DateTimeValue) content);
return new BytesRef(dt);
default:
return new BytesRef(content.getStringValue());
}
}

public static long dateToLong(DateValue date) {
final XMLGregorianCalendar utccal = date.calendar.normalize();
return ((long)utccal.getYear() << 16) + ((long)utccal.getMonth() << 8) + ((long)utccal.getDay());
}

public static long timeToLong(TimeValue time) {
return time.getTimeInMillis();
}

public static String dateTimeToString(DateTimeValue dtv) {
final XMLGregorianCalendar utccal = dtv.calendar.normalize();
final StringBuilder sb = new StringBuilder();
formatNumber(utccal.getMillisecond(), 3, sb);
formatNumber(utccal.getSecond(), 2, sb);
formatNumber(utccal.getMinute(), 2, sb);
formatNumber(utccal.getHour(), 2, sb);
formatNumber(utccal.getDay(), 2, sb);
formatNumber(utccal.getMonth(), 2, sb);
formatNumber(utccal.getYear(), 4, sb);
return sb.toString();
}

public static void formatNumber(int number, int digits, StringBuilder sb) {
int count = 0;
long n = number;
while (n > 0) {
final int digit = '0' + (int)n % 10;
sb.insert(0, (char)digit);
count++;
if (count == digits) {
break;
}
n = n / 10;
}
if (count < digits) {
for (int i = count; i < digits; i++) {
sb.insert(0, '0');
}
}
}

public TextCollector getCollector(NodePath path) {
return new SimpleTextCollector(this, includeNested, wsTreatment, caseSensitive);
}
Expand Down
Expand Up @@ -101,7 +101,10 @@ public Query toQuery(String field, QName qname, AtomicValue content, RangeIndex.
final int type = content.getType();
BytesRef bytes;
if (Type.subTypeOf(type, Type.STRING)) {
BytesRef key = analyzeContent(field, qname, content, docs);
BytesRef key = null;
if (operator != RangeIndex.Operator.MATCH) {
key = analyzeContent(field, qname, content.getStringValue(), docs);
}
WildcardQuery query;
switch (operator) {
case EQ:
Expand All @@ -122,7 +125,9 @@ public Query toQuery(String field, QName qname, AtomicValue content, RangeIndex.
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
case MATCH:
return new RegexpQuery(new Term(field, key));
RegexpQuery regexQuery = new RegexpQuery(new Term(field, content.getStringValue()));
regexQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return regexQuery;
}
}
if (operator == RangeIndex.Operator.EQ) {
Expand Down Expand Up @@ -161,6 +166,21 @@ public Query toQuery(String field, QName qname, AtomicValue content, RangeIndex.
} else {
return NumericRangeQuery.newFloatRange(field, (float) ((NumericValue) content).getDouble(), null, includeLower, includeUpper);
}
case Type.DATE:
long dl = RangeIndexConfigElement.dateToLong((DateValue) content);
if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newLongRange(field, null, dl, includeLower, includeUpper);
} else {
return NumericRangeQuery.newLongRange(field, dl, null, includeLower, includeUpper);
}
case Type.TIME:
long tl = RangeIndexConfigElement.timeToLong((TimeValue) content);
if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newLongRange(field, null, tl, includeLower, includeUpper);
} else {
return NumericRangeQuery.newLongRange(field, tl, null, includeLower, includeUpper);
}
case Type.DATE_TIME:
default:
if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return new TermRangeQuery(field, null, RangeIndexConfigElement.convertToBytes(content), includeLower, includeUpper);
Expand Down Expand Up @@ -678,9 +698,8 @@ private List<QName> getDefinedIndexesFor(QName qname, List<QName> indexes) {
return indexes;
}

protected BytesRef analyzeContent(String field, QName qname, AtomicValue content, DocumentSet docs) throws XPathException {
protected BytesRef analyzeContent(String field, QName qname, String data, DocumentSet docs) throws XPathException {
final Analyzer analyzer = getAnalyzer(qname, field, docs);
String data = content.getStringValue();
if (!isCaseSensitive(qname, field, docs)) {
data = data.toLowerCase();
}
Expand Down
Expand Up @@ -126,6 +126,14 @@ public class FieldLookup extends Function implements Optimizable {
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-matches", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"Used by optimizer to optimize a matches() function call",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value matches the regular expression."),
true
)
};

Expand Down
Expand Up @@ -104,6 +104,19 @@ public class Lookup extends Function implements Optimizable {
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the input node set whose node value is equal to the key.")
),
new FunctionSignature(
new QName("matches", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
DESCRIPTION,
new SequenceType[] {
new FunctionParameterSequenceType("nodes", Type.NODE, Cardinality.ZERO_OR_MORE,
"The node set to search using a range index which is defined on those nodes"),
new FunctionParameterSequenceType("regex", Type.STRING, Cardinality.ZERO_OR_MORE,
"The regular expression.")
},
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the input node set whose node value matches the regular expression. Regular expression " +
"syntax is limited to what Lucene supports. See http://lucene.apache.org/core/4_5_1/core/org/apache/lucene/util/automaton/RegExp.html")
)
};

Expand Down
Expand Up @@ -44,6 +44,7 @@ public class RangeIndexModule extends AbstractInternalModule {
new FunctionDef(Lookup.signatures[5], Lookup.class),
new FunctionDef(Lookup.signatures[6], Lookup.class),
new FunctionDef(Lookup.signatures[7], Lookup.class),
new FunctionDef(Lookup.signatures[8], Lookup.class),
new FunctionDef(FieldLookup.signatures[0], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[1], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[2], FieldLookup.class),
Expand All @@ -53,6 +54,7 @@ public class RangeIndexModule extends AbstractInternalModule {
new FunctionDef(FieldLookup.signatures[6], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[7], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[8], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[9], FieldLookup.class),
new FunctionDef(Optimize.signature, Optimize.class),
new FunctionDef(IndexKeys.signatures[0], IndexKeys.class)
};
Expand All @@ -67,6 +69,8 @@ public class RangeIndexModule extends AbstractInternalModule {
OPERATOR_MAP.put("starts-with", RangeIndex.Operator.STARTS_WITH);
OPERATOR_MAP.put("ends-with", RangeIndex.Operator.ENDS_WITH);
OPERATOR_MAP.put("contains", RangeIndex.Operator.CONTAINS);
OPERATOR_MAP.put("matches", RangeIndex.Operator.MATCH);

}

public RangeIndexModule(Map<String, List<? extends Object>> parameters) {
Expand Down
Expand Up @@ -108,7 +108,6 @@ public boolean rewriteLocationStep(LocationStep locationStep) throws XPathExcept
private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpression parentExpr, List<Predicate> preds, NodePath contextPath) throws XPathException {
// without context path, we cannot rewrite the entire query
if (contextPath != null) {
RangeIndex.Operator operator = null;
List<Expression> args = null;
SequenceConstructor arg0 = null;
SequenceConstructor arg1 = null;
Expand Down Expand Up @@ -207,13 +206,31 @@ private Lookup rewrite(Expression expression) throws XPathException {
Lookup func = Lookup.create(comparison.getContext(), getOperator(expression));
func.setArguments(eqArgs);
return func;
} else if (expression instanceof InternalFunctionCall) {
InternalFunctionCall fcall = (InternalFunctionCall) expression;
Function function = fcall.getFunction();
if (function instanceof Lookup) {
if (function.isCalledAs("matches")) {
eqArgs.add(function.getArgument(0));
eqArgs.add(function.getArgument(1));
Lookup func = Lookup.create(function.getContext(), RangeIndex.Operator.MATCH);
func.setArguments(eqArgs);
return func;
}
}
}
return null;
}

private Expression getKeyArg(Expression expression) {
if (expression instanceof GeneralComparison) {
return ((GeneralComparison)expression).getRight();
} else if (expression instanceof InternalFunctionCall) {
InternalFunctionCall fcall = (InternalFunctionCall) expression;
Function function = fcall.getFunction();
if (function instanceof Lookup) {
return function.getArgument(1);
}
}
return null;
}
Expand All @@ -222,6 +239,14 @@ private List<LocationStep> getStepsToOptimize(Expression expr) {
if (expr instanceof GeneralComparison) {
GeneralComparison comparison = (GeneralComparison) expr;
return BasicExpressionVisitor.findLocationSteps(comparison.getLeft());
} else if (expr instanceof InternalFunctionCall) {
InternalFunctionCall fcall = (InternalFunctionCall) expr;
Function function = fcall.getFunction();
if (function instanceof Lookup) {
if (function.isCalledAs("matches")) {
return BasicExpressionVisitor.findLocationSteps(function.getArgument(0));
}
}
}
return null;
}
Expand Down Expand Up @@ -261,6 +286,12 @@ private RangeIndex.Operator getOperator(Expression expr) {
}
break;
}
} else if (expr instanceof InternalFunctionCall) {
InternalFunctionCall fcall = (InternalFunctionCall) expr;
Function function = fcall.getFunction();
if (function instanceof Lookup && function.isCalledAs("matches")) {
operator = RangeIndex.Operator.MATCH;
}
}
return operator;
}
Expand Down
16 changes: 16 additions & 0 deletions extensions/indexes/range/test/src/xquery/optimizer.xql 100755 → 100644
Expand Up @@ -149,6 +149,14 @@ function ot:optimize-contains-string($name as xs:string) {
collection($ot:COLLECTION)//address[contains(name, $name)]
};

declare
%test:stats
%test:args("[rR]udi .*")
%test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
function ot:optimize-matches-string($name as xs:string) {
collection($ot:COLLECTION)//address[range:matches(name, $name)]
};

declare
%test:stats
%test:args("Rudi Rüssel")
Expand Down Expand Up @@ -324,6 +332,14 @@ function ot:optimize-contains-field($city as xs:string) {
collection($ot:COLLECTION)//address[contains(city, $city)]
};

declare
%test:stats
%test:args("[rR]üssel.*")
%test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
function ot:optimize-matches-field($city as xs:string) {
collection($ot:COLLECTION)//address[range:matches(city, $city)]
};

declare
%test:stats
%test:args("Rüsselsheim", "Elefantenweg 67")
Expand Down

0 comments on commit eade0cf

Please sign in to comment.