Skip to content
Browse files

Add support for rewriting calls to starts-with, ends-with, contains. …

…Basic collation support.
  • Loading branch information...
1 parent ee700c9 commit 6c04b999938b3a4a628d8d8329dcdd514e0d372e @wolfgangmm wolfgangmm committed Aug 9, 2013
View
1 extensions/indexes/lucene/src/org/exist/indexing/lucene/XMLToQuery.java
@@ -283,7 +283,6 @@ private String getTerm(String field, String text, Analyzer analyzer) throws XPat
}
private Query wildcardQuery(String field, Element node, Analyzer analyzer, Properties options) throws XPathException {
- String termStr = getTerm(field, getText(node), analyzer);
WildcardQuery query = new WildcardQuery(new Term(field, getText(node)));
setRewriteMethod(query, node, options);
return query;
View
10 extensions/indexes/range/src/org/exist/indexing/range/ComplexRangeIndexConfigElement.java
@@ -1,6 +1,7 @@
package org.exist.indexing.range;
import org.apache.log4j.Logger;
+import org.apache.lucene.analysis.Analyzer;
import org.exist.dom.QName;
import org.exist.storage.NodePath;
import org.exist.util.DatabaseConfigurationException;
@@ -71,6 +72,15 @@ public TextCollector getCollector() {
return new ComplexTextCollector(this);
}
+ @Override
+ public Analyzer getAnalyzer(String fieldName) {
+ RangeIndexConfigField field = fields.get(fieldName);
+ if (field != null) {
+ return field.getAnalyzer();
+ }
+ return null;
+ }
+
public RangeIndexConfigField getField(NodePath path) {
for (RangeIndexConfigField field: fields.values()) {
if (field.match(path))
View
11 extensions/indexes/range/src/org/exist/indexing/range/RangeIndex.java
@@ -13,6 +13,17 @@
public final static String ID = RangeIndex.class.getName();
+ public enum Operator {
+ GT,
+ LT,
+ EQ,
+ GE,
+ LE,
+ ENDS_WITH,
+ STARTS_WITH,
+ CONTAINS
+ };
+
private static final String DIR_NAME = "range";
private Analyzer defaultAnalyzer = new KeywordAnalyzer();
View
4 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfig.java
@@ -3,13 +3,17 @@
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.util.Version;
import org.exist.dom.NodeListImpl;
import org.exist.dom.QName;
import org.exist.storage.NodePath;
+import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
+import org.exist.xquery.XPathException;
import org.exist.xquery.value.Type;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
View
85 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigElement.java
@@ -1,30 +1,48 @@
package org.exist.indexing.range;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.NumericRangeQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.Version;
+import org.exist.dom.DocumentSet;
import org.exist.dom.QName;
import org.exist.indexing.lucene.LuceneIndexConfig;
import org.exist.storage.NodePath;
+import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.xquery.Constants;
import org.exist.xquery.XPathException;
import org.exist.xquery.value.*;
import org.w3c.dom.Element;
+import java.io.IOException;
+import java.io.StringReader;
import java.util.Map;
public class RangeIndexConfigElement {
+ public static final org.apache.lucene.document.FieldType TYPE_CONTENT = new org.apache.lucene.document.FieldType();
+ static {
+ TYPE_CONTENT.setIndexed(true);
+ TYPE_CONTENT.setStored(false);
+ TYPE_CONTENT.setOmitNorms(true);
+ TYPE_CONTENT.setStoreTermVectors(false);
+ TYPE_CONTENT.setTokenized(true);
+ }
+
protected NodePath path = null;
private int type = Type.STRING;
private RangeIndexConfigElement nextConfig = null;
private boolean isQNameIndex = false;
+ protected Analyzer analyzer = null;
public RangeIndexConfigElement() {
}
@@ -52,10 +70,19 @@ public RangeIndexConfigElement(Element node, Map<String, String> namespaces) thr
throw new DatabaseConfigurationException("Invalid type declared for range index on " + match + ": " + typeStr);
}
}
+ String collation = node.getAttribute("collation");
+ if (collation != null && collation.length() > 0) {
+ try {
+ analyzer = new CollationKeyAnalyzer(Version.LUCENE_43, Collations.getCollationFromURI(null, collation));
+ } catch (XPathException e) {
+ throw new DatabaseConfigurationException(e.getMessage(), e);
+ }
+ }
}
- public Field convertToField(String fieldName, String content) {
+ public Field convertToField(String fieldName, String content) throws IOException {
int fieldType = getType(fieldName);
+ Analyzer analyzer = getAnalyzer(fieldName);
try {
switch (fieldType) {
case Type.INTEGER:
@@ -78,6 +105,10 @@ public Field convertToField(String fieldName, String content) {
return new FloatField(fieldName, fvalue, FloatField.TYPE_NOT_STORED);
default:
// default: treat as text string
+ if (analyzer != null) {
+ TokenStream stream = analyzer.tokenStream(fieldName, new StringReader(content));
+ return new TextField(fieldName, stream);
+ }
return new TextField(fieldName, content, Field.Store.NO);
}
} catch (NumberFormatException e) {
@@ -86,18 +117,38 @@ public Field convertToField(String fieldName, String content) {
return null;
}
- public static Query toQuery(String field, AtomicValue content, int operator) throws XPathException {
- if (operator == Constants.EQ) {
+ public static Query toQuery(String field, AtomicValue content, RangeIndex.Operator operator,
+ DocumentSet docs, RangeIndexWorker worker) throws XPathException {
+ final int type = content.getType();
+ BytesRef bytes;
+ if (Type.subTypeOf(type, Type.STRING)) {
+ BytesRef key = worker.analyzeContent(field, content, docs);
+ switch (operator) {
+ case EQ:
+ return new TermQuery(new Term(field, key));
+ case STARTS_WITH:
+ return new PrefixQuery(new Term(field, key));
+ case ENDS_WITH:
+ bytes = new BytesRef("*");
+ bytes.append(key);
+ return new WildcardQuery(new Term(field, bytes));
+ case CONTAINS:
+ bytes = new BytesRef("*");
+ bytes.append(key);
+ bytes.append(new BytesRef("*"));
+ return new WildcardQuery(new Term(field, bytes));
+ }
+ }
+ if (operator == RangeIndex.Operator.EQ) {
return new TermQuery(new Term(field, convertToBytes(content)));
}
- final int type = content.getType();
- final boolean includeUpper = operator == Constants.LTEQ;
- final boolean includeLower = operator == Constants.GTEQ;
+ final boolean includeUpper = operator == RangeIndex.Operator.LE;
+ final boolean includeLower = operator == RangeIndex.Operator.GE;
switch (type) {
case Type.INTEGER:
case Type.LONG:
case Type.UNSIGNED_LONG:
- if (operator == Constants.LT || operator == Constants.LTEQ) {
+ if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newLongRange(field, null, ((NumericValue)content).getLong(), includeLower, includeUpper);
} else {
return NumericRangeQuery.newLongRange(field, ((NumericValue)content).getLong(), null, includeLower, includeUpper);
@@ -106,26 +157,26 @@ public static Query toQuery(String field, AtomicValue content, int operator) thr
case Type.UNSIGNED_INT:
case Type.SHORT:
case Type.UNSIGNED_SHORT:
- if (operator == Constants.LT || operator == Constants.LTEQ) {
+ if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newIntRange(field, null, ((NumericValue) content).getInt(), includeLower, includeUpper);
} else {
return NumericRangeQuery.newIntRange(field, ((NumericValue) content).getInt(), null, includeLower, includeUpper);
}
case Type.DECIMAL:
case Type.DOUBLE:
- if (operator == Constants.LT || operator == Constants.LTEQ) {
+ if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newDoubleRange(field, null, ((NumericValue) content).getDouble(), includeLower, includeUpper);
} else {
return NumericRangeQuery.newDoubleRange(field, ((NumericValue) content).getDouble(), null, includeLower, includeUpper);
}
case Type.FLOAT:
- if (operator == Constants.LT || operator == Constants.LTEQ) {
+ if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return NumericRangeQuery.newFloatRange(field, null, (float) ((NumericValue) content).getDouble(), includeLower, includeUpper);
} else {
return NumericRangeQuery.newFloatRange(field, (float) ((NumericValue) content).getDouble(), null, includeLower, includeUpper);
}
default:
- if (operator == Constants.LT || operator == Constants.LTEQ) {
+ if (operator == RangeIndex.Operator.LT || operator == RangeIndex.Operator.LE) {
return new TermRangeQuery(field, null, convertToBytes(content), includeLower, includeUpper);
} else {
return new TermRangeQuery(field, convertToBytes(content), null, includeLower, includeUpper);
@@ -209,6 +260,10 @@ public TextCollector getCollector() {
return new SimpleTextCollector();
}
+ public Analyzer getAnalyzer(String field) {
+ return analyzer;
+ }
+
public boolean isComplex() {
return false;
}
View
17 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigField.java
@@ -1,6 +1,10 @@
package org.exist.indexing.range;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
+import org.apache.lucene.util.Version;
import org.exist.storage.NodePath;
+import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.xquery.XPathException;
import org.exist.xquery.value.Type;
@@ -13,6 +17,7 @@
private String name;
private NodePath path = null;
private int type = Type.STRING;
+ private Analyzer analyzer = null;
public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, String> namespaces) throws DatabaseConfigurationException {
name = elem.getAttribute("name");
@@ -39,6 +44,14 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
throw new DatabaseConfigurationException("Invalid type declared for range index on " + match + ": " + typeStr);
}
}
+ String collation = elem.getAttribute("collation");
+ if (collation != null && collation.length() > 0) {
+ try {
+ analyzer = new CollationKeyAnalyzer(Version.LUCENE_43, Collations.getCollationFromURI(null, collation));
+ } catch (XPathException e) {
+ throw new DatabaseConfigurationException(e.getMessage(), e);
+ }
+ }
}
public String getName() {
@@ -53,6 +66,10 @@ public int getType() {
return type;
}
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
public boolean match(NodePath other) {
return path.match(other);
}
View
40 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexWorker.java
@@ -2,6 +2,10 @@
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.TermsFilter;
@@ -32,6 +36,7 @@
import org.w3c.dom.NodeList;
import java.io.IOException;
+import java.io.StringReader;
import java.util.*;
public class RangeIndexWorker implements OrderedValuesIndex, QNamedKeysIndex {
@@ -342,7 +347,7 @@ private void write() {
}
}
- public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<QName> qnames, AtomicValue[] keys, int operator, int axis) throws IOException, XPathException {
+ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<QName> qnames, AtomicValue[] keys, RangeIndex.Operator operator, int axis) throws IOException, XPathException {
qnames = getDefinedIndexes(qnames);
NodeSet resultSet = NodeSet.EMPTY_SET;
IndexSearcher searcher = null;
@@ -354,11 +359,11 @@ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<Q
if (keys.length > 1) {
BooleanQuery bool = new BooleanQuery();
for (AtomicValue key: keys) {
- bool.add(RangeIndexConfigElement.toQuery(field, key, operator), BooleanClause.Occur.SHOULD);
+ bool.add(RangeIndexConfigElement.toQuery(field, key, operator, docs, this), BooleanClause.Occur.SHOULD);
}
query = bool;
} else {
- query = RangeIndexConfigElement.toQuery(field, keys[0], operator);
+ query = RangeIndexConfigElement.toQuery(field, keys[0], operator, docs, this);
}
resultSet = doQuery(contextId, docs, contextSet, axis, searcher, qname, query, null);
@@ -369,7 +374,7 @@ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<Q
return resultSet;
}
- public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, Sequence fields, Sequence[] keys, int operator, int axis) throws IOException, XPathException {
+ public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, Sequence fields, Sequence[] keys, RangeIndex.Operator operator, int axis) throws IOException, XPathException {
NodeSet resultSet = NodeSet.EMPTY_SET;
IndexSearcher searcher = null;
try {
@@ -390,12 +395,12 @@ public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, S
bool.setMinimumNumberShouldMatch(1);
for (SequenceIterator ki = keys[j].iterate(); ki.hasNext(); ) {
Item key = ki.nextItem();
- Query q = RangeIndexConfigElement.toQuery(field, key.atomize(), operator);
+ Query q = RangeIndexConfigElement.toQuery(field, key.atomize(), operator, docs, this);
bool.add(q, BooleanClause.Occur.SHOULD);
}
query.add(bool, BooleanClause.Occur.MUST);
} else {
- Query q = RangeIndexConfigElement.toQuery(field, keys[j].itemAt(0).atomize(), operator);
+ Query q = RangeIndexConfigElement.toQuery(field, keys[j].itemAt(0).atomize(), operator, docs, this);
query.add(q, BooleanClause.Occur.MUST);
}
}
@@ -597,11 +602,32 @@ public Status needsField(FieldInfo fieldInfo) throws IOException {
return indexes;
}
+ protected BytesRef analyzeContent(String field, AtomicValue content, DocumentSet docs) throws XPathException {
+ try {
+ TokenStream stream = getAnalyzer(docs).tokenStream(field, new StringReader(content.getStringValue()));
+ TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class);
+ BytesRef token = null;
+ try {
+ stream.reset();
+ if (stream.incrementToken()) {
+ termAttr.fillBytesRef();
+ token = termAttr.getBytesRef();
+ }
+ stream.end();
+ } finally {
+ stream.close();
+ }
+ return token;
+ } catch (IOException e) {
+ throw new XPathException("Error analyzing the query string: " + e.getMessage(), e);
+ }
+ }
+
/**
* Return the analyzer to be used for the given field or qname. Either field
* or qname should be specified.
*/
- private Analyzer getAnalyzer(DBBroker broker, DocumentSet docs) {
+ private Analyzer getAnalyzer(DocumentSet docs) {
for (Iterator<Collection> i = docs.getCollectionIterator(); i.hasNext(); ) {
Collection collection = i.next();
IndexSpec idxConf = collection.getIndexConfiguration(broker);
View
61 extensions/indexes/range/src/org/exist/xquery/modules/range/FieldLookup.java
@@ -66,24 +66,57 @@
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
+ ),
+ new FunctionSignature(
+ new QName("field-starts-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the field set whose node value is equal to the key."),
+ true
+ ),
+ new FunctionSignature(
+ new QName("field-ends-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the field set whose node value is equal to the key."),
+ true
+ ),
+ new FunctionSignature(
+ new QName("field-contains", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the field set whose node value is equal to the key."),
+ true
)
};
- public static FieldLookup create(XQueryContext context, int operator) {
+ public static FieldLookup create(XQueryContext context, RangeIndex.Operator operator) {
FunctionSignature signature;
switch (operator) {
- case Constants.GT:
+ case GT:
signature = signatures[1];
break;
- case Constants.LT:
+ case LT:
signature = signatures[2];
break;
- case Constants.LTEQ:
+ case LE:
signature = signatures[3];
break;
- case Constants.GTEQ:
+ case GE:
signature = signatures[4];
break;
+ case STARTS_WITH:
+ signature = signatures[5];
+ break;
+ case ENDS_WITH:
+ signature = signatures[6];
+ break;
+ case CONTAINS:
+ signature = signatures[7];
+ break;
default:
signature = signatures[0];
break;
@@ -140,7 +173,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
keys[i - 1] = getArgument(i).eval(contextSequence);
}
DocumentSet docs = contextSequence.getDocumentSet();
- final int operator = getOperator();
+ final RangeIndex.Operator operator = getOperator();
RangeIndexWorker index = (RangeIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(RangeIndex.ID);
@@ -188,7 +221,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
for (int i = 1; i < getArgumentCount(); i++) {
keys[i - 1] = getArgument(i).eval(contextSequence);
}
- final int operator = getOperator();
+ final RangeIndex.Operator operator = getOperator();
RangeIndexWorker index = (RangeIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(RangeIndex.ID);
@@ -210,19 +243,9 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
return result;
}
- protected int getOperator() {
- int operator = Constants.EQ;
+ private RangeIndex.Operator getOperator() {
final String calledAs = getSignature().getName().getLocalName();
- if ("field-gt".equals(calledAs)) {
- operator = Constants.GT;
- } else if ("field-ge".equals(calledAs)) {
- operator = Constants.GTEQ;
- } else if ("field-lt".equals(calledAs)) {
- operator = Constants.LT;
- } else if ("field-le".equals(calledAs)) {
- operator = Constants.LTEQ;
- }
- return operator;
+ return RangeIndexModule.OPERATOR_MAP.get(calledAs.substring("field-".length()));
}
@Override
View
58 extensions/indexes/range/src/org/exist/xquery/modules/range/Lookup.java
@@ -59,24 +59,54 @@
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the input node set whose node value is equal to the key.")
+ ),
+ new FunctionSignature(
+ new QName("starts-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the input node set whose node value is equal to the key.")
+ ),
+ new FunctionSignature(
+ new QName("ends-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the input node set whose node value is equal to the key.")
+ ),
+ new FunctionSignature(
+ new QName("contains", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
+ "",
+ PARAMETER_TYPE,
+ new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
+ "all nodes from the input node set whose node value is equal to the key.")
)
};
- public static Lookup create(XQueryContext context, int operator) {
+ public static Lookup create(XQueryContext context, RangeIndex.Operator operator) {
FunctionSignature signature;
switch (operator) {
- case Constants.GT:
+ case GT:
signature = signatures[1];
break;
- case Constants.LT:
+ case LT:
signature = signatures[2];
break;
- case Constants.LTEQ:
+ case LE:
signature = signatures[3];
break;
- case Constants.GTEQ:
+ case GE:
signature = signatures[4];
break;
+ case STARTS_WITH:
+ signature = signatures[5];
+ break;
+ case ENDS_WITH:
+ signature = signatures[6];
+ break;
+ case CONTAINS:
+ signature = signatures[7];
+ break;
default:
signature = signatures[0];
break;
@@ -183,7 +213,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
qnames.add(contextQName);
}
- final int operator = getOperator();
+ final RangeIndex.Operator operator = getOperator();
try {
preselectResult = index.query(getExpressionId(), docs, useContext ? contextSequence.toNodeSet() : null, qnames, keys, operator, NodeSet.DESCENDANT);
@@ -201,19 +231,9 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
return preselectResult;
}
- private int getOperator() {
- int operator = Constants.EQ;
+ private RangeIndex.Operator getOperator() {
final String calledAs = getSignature().getName().getLocalName();
- if ("gt".equals(calledAs)) {
- operator = Constants.GT;
- } else if ("ge".equals(calledAs)) {
- operator = Constants.GTEQ;
- } else if ("lt".equals(calledAs)) {
- operator = Constants.LT;
- } else if ("le".equals(calledAs)) {
- operator = Constants.LTEQ;
- }
- return operator;
+ return RangeIndexModule.OPERATOR_MAP.get(calledAs);
}
private AtomicValue[] getKeys(Sequence contextSequence) throws XPathException {
@@ -255,7 +275,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
qnames = new ArrayList<QName>(1);
qnames.add(contextQName);
}
- final int operator = getOperator();
+ final RangeIndex.Operator operator = getOperator();
try {
NodeSet inNodes = input.toNodeSet();
View
20 extensions/indexes/range/src/org/exist/xquery/modules/range/RangeIndexModule.java
@@ -1,8 +1,10 @@
package org.exist.xquery.modules.range;
+import org.exist.indexing.range.RangeIndex;
import org.exist.xquery.AbstractInternalModule;
import org.exist.xquery.FunctionDef;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -18,14 +20,32 @@
new FunctionDef(Lookup.signatures[2], Lookup.class),
new FunctionDef(Lookup.signatures[3], Lookup.class),
new FunctionDef(Lookup.signatures[4], Lookup.class),
+ new FunctionDef(Lookup.signatures[5], Lookup.class),
+ new FunctionDef(Lookup.signatures[6], Lookup.class),
+ new FunctionDef(Lookup.signatures[7], Lookup.class),
new FunctionDef(FieldLookup.signatures[0], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[1], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[2], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[3], FieldLookup.class),
new FunctionDef(FieldLookup.signatures[4], FieldLookup.class),
+ new FunctionDef(FieldLookup.signatures[5], FieldLookup.class),
+ new FunctionDef(FieldLookup.signatures[6], FieldLookup.class),
+ new FunctionDef(FieldLookup.signatures[7], FieldLookup.class),
new FunctionDef(Optimize.signature, Optimize.class)
};
+ public final static Map<String, RangeIndex.Operator> OPERATOR_MAP = new HashMap<String, RangeIndex.Operator>();
+ static {
+ OPERATOR_MAP.put("eq", RangeIndex.Operator.EQ);
+ OPERATOR_MAP.put("lt", RangeIndex.Operator.LT);
+ OPERATOR_MAP.put("gt", RangeIndex.Operator.GT);
+ OPERATOR_MAP.put("ge", RangeIndex.Operator.GE);
+ OPERATOR_MAP.put("le", RangeIndex.Operator.LE);
+ OPERATOR_MAP.put("starts-with", RangeIndex.Operator.STARTS_WITH);
+ OPERATOR_MAP.put("ends-with", RangeIndex.Operator.ENDS_WITH);
+ OPERATOR_MAP.put("contains", RangeIndex.Operator.CONTAINS);
+ }
+
public RangeIndexModule(Map<String, List<? extends Object>> parameters) {
super(functions, parameters, false);
}
View
104 extensions/indexes/range/src/org/exist/xquery/modules/range/RangeQueryRewriter.java
@@ -45,13 +45,13 @@ public boolean rewriteLocationStep(LocationStep locationStep) throws XPathExcept
// can only optimize predicates with one expression
break;
}
+
Expression innerExpr = pred.getExpression(0);
- if (!(innerExpr instanceof GeneralComparison)) {
+ List<LocationStep> steps = getStepsToOptimize(innerExpr);
+ if (steps == null) {
+ // no optimizable steps found
break;
}
- GeneralComparison comparison = (GeneralComparison) innerExpr;
- List<LocationStep> steps = BasicExpressionVisitor.findLocationSteps(comparison.getLeft());
-
// compute left hand path
NodePath innerPath = toNodePath(steps);
if (innerPath == null) {
@@ -71,16 +71,12 @@ public boolean rewriteLocationStep(LocationStep locationStep) throws XPathExcept
if (rice != null && !rice.isComplex()) {
// found simple index configuration: replace with call to lookup function
// collect arguments
- ArrayList<Expression> eqArgs = new ArrayList<Expression>(2);
- eqArgs.add(comparison.getLeft());
- eqArgs.add(comparison.getRight());
- Lookup func = Lookup.create(comparison.getContext(), comparison.getRelation());
+ Lookup func = rewrite(innerExpr);
// preserve original comparison: may need it for in-memory lookups
- func.setFallback(comparison);
- func.setLocation(comparison.getLine(), comparison.getColumn());
- func.setArguments(eqArgs);
+ func.setFallback(innerExpr);
+ func.setLocation(innerExpr.getLine(), innerExpr.getColumn());
// replace comparison with range:eq
- pred.replace(comparison, new InternalFunctionCall(func));
+ pred.replace(innerExpr, new InternalFunctionCall(func));
}
}
}
@@ -91,7 +87,7 @@ public boolean rewriteLocationStep(LocationStep locationStep) throws XPathExcept
private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpression parentExpr, List<Predicate> preds, NodePath contextPath) throws XPathException {
// without context path, we cannot rewrite the entire query
if (contextPath != null) {
- int operator = -1;
+ RangeIndex.Operator operator = null;
List<Expression> args = null;
SequenceConstructor arg0 = null;
@@ -103,11 +99,10 @@ private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpressi
return false;
}
Expression innerExpr = pred.getExpression(0);
- if (!(innerExpr instanceof GeneralComparison)) {
+ List<LocationStep> steps = getStepsToOptimize(innerExpr);
+ if (steps == null) {
return false;
}
- GeneralComparison comparison = (GeneralComparison) innerExpr;
- List<LocationStep> steps = BasicExpressionVisitor.findLocationSteps(comparison.getLeft());
// compute left hand path
NodePath innerPath = toNodePath(steps);
if (innerPath == null) {
@@ -124,14 +119,15 @@ private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpressi
// check for a matching sub-path and retrieve field information
RangeIndexConfigField field = ((ComplexRangeIndexConfigElement) rice).getField(path);
if (field != null) {
- if (comparison.getRelation() != operator) {
- if (operator > -1) {
+ RangeIndex.Operator currentOperator = getOperator(innerExpr);
+ if (currentOperator != operator) {
+ if (operator != null) {
// wrong operator: cannot optimize. break out.
- operator = -1;
+ operator = null;
args = null;
return false;
} else {
- operator = comparison.getRelation();
+ operator = currentOperator;
}
}
if (args == null) {
@@ -143,7 +139,7 @@ private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpressi
// field is added to the sequence in first parameter
arg0.add(new LiteralValue(getContext(), new StringValue(field.getName())));
// append right hand expression as additional parameter
- args.add(comparison.getRight());
+ args.add(getKeyArg(innerExpr));
} else {
return false;
}
@@ -170,6 +166,72 @@ private boolean tryRewriteToFields(LocationStep locationStep, RewritableExpressi
return false;
}
+ private Lookup rewrite(Expression expression) throws XPathException {
+ ArrayList<Expression> eqArgs = new ArrayList<Expression>(2);
+ if (expression instanceof GeneralComparison) {
+ GeneralComparison comparison = (GeneralComparison) expression;
+ eqArgs.add(comparison.getLeft());
+ eqArgs.add(comparison.getRight());
+ Lookup func = Lookup.create(comparison.getContext(), getOperator(expression));
+ func.setArguments(eqArgs);
+ return func;
+ }
+ return null;
+ }
+
+ private Expression getKeyArg(Expression expression) {
+ if (expression instanceof GeneralComparison) {
+ return ((GeneralComparison)expression).getRight();
+ }
+ return null;
+ }
+
+ private List<LocationStep> getStepsToOptimize(Expression expr) {
+ if (expr instanceof GeneralComparison) {
+ GeneralComparison comparison = (GeneralComparison) expr;
+ return BasicExpressionVisitor.findLocationSteps(comparison.getLeft());
+ }
+ return null;
+ }
+
+ private RangeIndex.Operator getOperator(Expression expr) {
+ RangeIndex.Operator operator = RangeIndex.Operator.EQ;
+ if (expr instanceof GeneralComparison) {
+ GeneralComparison comparison = (GeneralComparison) expr;
+ int relation = comparison.getRelation();
+ switch(relation) {
+ case Constants.LT:
+ operator = RangeIndex.Operator.LT;
+ break;
+ case Constants.GT:
+ operator = RangeIndex.Operator.GT;
+ break;
+ case Constants.LTEQ:
+ operator = RangeIndex.Operator.LE;
+ break;
+ case Constants.GTEQ:
+ operator = RangeIndex.Operator.GE;
+ break;
+ case Constants.EQ:
+ switch (comparison.getTruncation()) {
+ case Constants.TRUNC_BOTH:
+ operator = RangeIndex.Operator.CONTAINS;
+ break;
+ case Constants.TRUNC_LEFT:
+ operator = RangeIndex.Operator.ENDS_WITH;
+ break;
+ case Constants.TRUNC_RIGHT:
+ operator = RangeIndex.Operator.STARTS_WITH;
+ break;
+ default:
+ operator = RangeIndex.Operator.EQ;
+ break;
+ }
+ break;
+ }
+ }
+ return operator;
+ }
/**
* Scan all index configurations to find one matching path.
*/
View
48 extensions/indexes/range/test/src/xquery/optimizer.xql
@@ -98,6 +98,30 @@ function ot:optimize-eq-string($name as xs:string) {
declare
%test:stats
+ %test:args("Rudi")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-starts-with-string($name as xs:string) {
+ collection($ot:COLLECTION)//address[starts-with(name, $name)]
+};
+
+declare
+ %test:stats
+ %test:args("Rüssel")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-ends-with-string($name as xs:string) {
+ collection($ot:COLLECTION)//address[ends-with(name, $name)]
+};
+
+declare
+ %test:stats
+ %test:args("udi ")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-contains-string($name as xs:string) {
+ collection($ot:COLLECTION)//address[contains(name, $name)]
+};
+
+declare
+ %test:stats
%test:args("Rudi Rüssel")
%test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
function ot:optimize-eq-string-self($name as xs:string) {
@@ -249,6 +273,30 @@ function ot:le-field($city as xs:string) {
declare
%test:stats
+ %test:args("Rüssel")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-starts-with-field($city as xs:string) {
+ collection($ot:COLLECTION)//address[starts-with(city, $city)]
+};
+
+declare
+ %test:stats
+ %test:args("heim")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-ends-with-field($city as xs:string) {
+ collection($ot:COLLECTION)//address[ends-with(city, $city)]
+};
+
+declare
+ %test:stats
+ %test:args("üssel")
+ %test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
+function ot:optimize-contains-field($city as xs:string) {
+ collection($ot:COLLECTION)//address[contains(city, $city)]
+};
+
+declare
+ %test:stats
%test:args("Rüsselsheim", "Elefantenweg 67")
%test:assertXPath("$result//stats:index[@type = 'new-range'][@optimization = 2]")
function ot:optimize-eq-field-multi($city as xs:string, $street as xs:string) {
View
54 extensions/indexes/range/test/src/xquery/range.xql
@@ -232,6 +232,60 @@ function rt:le-integer($code as xs:integer) {
count(//address[range:le(city/@code, $code)])
};
+declare
+ %test:args("Rudi")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("Berta")
+ %test:assertEquals("Almweide")
+function rt:starts-with-string($name as xs:string) {
+ //address[range:starts-with(name, $name)]/city/text()
+};
+
+declare
+ %test:args("Rüssel")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("Muh")
+ %test:assertEquals("Almweide")
+function rt:ends-with-string($name as xs:string) {
+ //address[range:ends-with(name, $name)]/city/text()
+};
+
+declare
+ %test:args("üss")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("ta M")
+ %test:assertEquals("Almweide")
+function rt:contains-string($name as xs:string) {
+ //address[range:contains(name, $name)]/city/text()
+};
+
+declare
+ %test:args("Rudi")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("Berta")
+ %test:assertEquals("Almweide")
+function rt:field-starts-with-string($name as xs:string) {
+ range:field-starts-with("address-name", $name)/city/text()
+};
+
+declare
+ %test:args("Rüssel")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("Muh")
+ %test:assertEquals("Almweide")
+function rt:field-ends-with-string($name as xs:string) {
+ range:field-ends-with("address-name", $name)/city/text()
+};
+
+declare
+ %test:args("üss")
+ %test:assertEquals("Rüsselsheim")
+ %test:args("ta M")
+ %test:assertEquals("Almweide")
+function rt:field-contains-string($name as xs:string) {
+ range:field-contains("address-name", $name)/city/text()
+};
+
declare
%test:assertEquals("Almweide")
function rt:remove-document() {
View
3 src/org/exist/xquery/GeneralComparison.java
@@ -305,6 +305,9 @@ public int getRelation()
return( this.relation );
}
+ public int getTruncation() {
+ return truncation;
+ }
public NodeSet preSelect( Sequence contextSequence, boolean useContext ) throws XPathException
{

0 comments on commit 6c04b99

Please sign in to comment.
Something went wrong with that request. Please try again.