Skip to content

Commit

Permalink
Optimized queries on filters using different operators.
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfgangmm committed Aug 29, 2013
1 parent 7e1858c commit 9257c27
Show file tree
Hide file tree
Showing 18 changed files with 488 additions and 188 deletions.
@@ -1,3 +1,24 @@
/*
* eXist Open Source Native XML Database
* Copyright (C) 2013 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.indexing.range;

import org.apache.log4j.Logger;
Expand Down
Expand Up @@ -18,15 +18,15 @@ public class ComplexTextCollector implements TextCollector {

public ComplexTextCollector(ComplexRangeIndexConfigElement configuration, NodePath parentPath) {
this.config = configuration;
this.parentPath = new NodePath(parentPath);
this.parentPath = new NodePath(parentPath, false);
}

@Override
public void startElement(QName qname, NodePath path) {
RangeIndexConfigField fieldConf = config.getField(parentPath, path);
if (fieldConf != null) {
currentField = fieldConf;
Field field = new Field(currentField.getName(), false, fieldConf.whitespaceTreatment());
Field field = new Field(currentField.getName(), false, fieldConf.whitespaceTreatment(), fieldConf.isCaseSensitive());
fields.add(field);
}

Expand All @@ -43,7 +43,7 @@ public void endElement(QName qname, NodePath path) {
public void attribute(AttrImpl attribute, NodePath path) {
RangeIndexConfigField fieldConf = config.getField(parentPath, path);
if (fieldConf != null) {
Field field = new Field(fieldConf.getName(), true, fieldConf.whitespaceTreatment());
Field field = new Field(fieldConf.getName(), true, fieldConf.whitespaceTreatment(), fieldConf.isCaseSensitive());
field.content.append(attribute.getValue());
fields.add(0, field);
}
Expand Down
@@ -1,3 +1,24 @@
/*
* eXist Open Source Native XML Database
* Copyright (C) 2013 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.indexing.range;

import org.apache.log4j.Logger;
Expand All @@ -7,21 +28,41 @@
import org.exist.indexing.lucene.LuceneIndex;
import org.exist.storage.DBBroker;

/**
* Main implementation class for the new range index. This extends the existing LuceneIndex.
*
* @author Wolfgang Meier
*/
public class RangeIndex extends LuceneIndex {

private static final Logger LOG = Logger.getLogger(RangeIndex.class);

public final static String ID = RangeIndex.class.getName();

/**
* Enumeration of supported operators and optimized functions.
*/
public enum Operator {
GT,
LT,
EQ,
GE,
LE,
ENDS_WITH,
STARTS_WITH,
CONTAINS
GT ("gt"),
LT ("lt"),
EQ ("eq"),
GE ("ge"),
LE ("le"),
ENDS_WITH ("ends-with"),
STARTS_WITH ("starts-with"),
CONTAINS ("contains"),
MATCH ("matches");

private final String name;

Operator(String name) {
this.name = name;
}

@Override
public String toString() {
return name;
}
};

private static final String DIR_NAME = "range";
Expand Down
Expand Up @@ -20,21 +20,13 @@

public class RangeIndexConfigElement {

public static final org.apache.lucene.document.FieldType TYPE_CONTENT = new org.apache.lucene.document.FieldType();
static {
TYPE_CONTENT.setIndexed(true);
TYPE_CONTENT.setStored(false);
TYPE_CONTENT.setOmitNorms(true);
TYPE_CONTENT.setStoreTermVectors(false);
TYPE_CONTENT.setTokenized(true);
}

protected NodePath path = null;
private int type = Type.STRING;
private RangeIndexConfigElement nextConfig = null;
protected boolean isQNameIndex = false;
protected Analyzer analyzer = null;
protected boolean includeNested = false;
protected boolean caseSensitive = true;
protected int wsTreatment = XMLString.SUPPRESS_NONE;

public RangeIndexConfigElement(Element node, Map<String, String> namespaces) throws DatabaseConfigurationException {
Expand Down Expand Up @@ -81,6 +73,11 @@ public RangeIndexConfigElement(Element node, Map<String, String> namespaces) thr
wsTreatment = XMLString.NORMALIZE;
}
}

String caseStr = node.getAttribute("case");
if (caseStr != null && caseStr.length() > 0) {
caseSensitive = caseStr.equalsIgnoreCase("yes");
}
}

public Field convertToField(String fieldName, String content) throws IOException {
Expand Down Expand Up @@ -151,7 +148,7 @@ public static BytesRef convertToBytes(AtomicValue content) throws XPathException
}

public TextCollector getCollector(NodePath path) {
return new SimpleTextCollector(this, includeNested, wsTreatment);
return new SimpleTextCollector(this, includeNested, wsTreatment, caseSensitive);
}

public Analyzer getAnalyzer() {
Expand Down
@@ -1,12 +1,8 @@
package org.exist.indexing.range;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.util.Version;
import org.exist.dom.QName;
import org.exist.indexing.lucene.LuceneIndexConfig;
import org.exist.storage.NodePath;
import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.XMLString;
import org.exist.xquery.XPathException;
Expand All @@ -24,6 +20,7 @@ public class RangeIndexConfigField {
protected boolean includeNested = false;
protected int wsTreatment = XMLString.SUPPRESS_NONE;
protected boolean isQNameIndex = false;
protected boolean caseSensitive = true;

public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, String> namespaces) throws DatabaseConfigurationException {
name = elem.getAttribute("name");
Expand All @@ -47,6 +44,8 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
path = new NodePath(qname);
relPath = path;
isQNameIndex = true;
} else {
path = parentPath;
}
String typeStr = elem.getAttribute("type");
if (typeStr != null && typeStr.length() > 0) {
Expand All @@ -58,6 +57,7 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
}
String nested = elem.getAttribute("nested");
includeNested = (nested == null || nested.equalsIgnoreCase("yes"));
path.setIncludeDescendants(includeNested);

// normalize whitespace if whitespace="normalize"
String whitespace = elem.getAttribute("whitespace");
Expand All @@ -68,6 +68,11 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
wsTreatment = XMLString.NORMALIZE;
}
}

String caseStr = elem.getAttribute("case");
if (caseStr != null && caseStr.length() > 0) {
caseSensitive = caseStr.equalsIgnoreCase("yes");
}
}

public String getName() {
Expand All @@ -78,10 +83,6 @@ public NodePath getPath() {
return path;
}

public NodePath getRelPath() {
return relPath;
}

public int getType() {
return type;
}
Expand All @@ -91,15 +92,23 @@ public boolean match(NodePath other) {
}

public boolean match(NodePath parentPath, NodePath other) {
NodePath absPath = new NodePath(parentPath);
absPath.append(relPath);
return absPath.match(other);
if (relPath == null) {
return parentPath.match(other);
} else {
NodePath absPath = new NodePath(parentPath);
absPath.append(relPath);
return absPath.match(other);
}
}

public int whitespaceTreatment() {
return wsTreatment;
}

public boolean isCaseSensitive() {
return caseSensitive;
}

public boolean includeNested() {
return includeNested;
}
Expand Down
@@ -1,3 +1,24 @@
/*
* eXist Open Source Native XML Database
* Copyright (C) 2013 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.indexing.range;

import org.exist.dom.QName;
Expand Down
@@ -1,3 +1,24 @@
/*
* eXist Open Source Native XML Database
* Copyright (C) 2013 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.indexing.range;

import org.apache.log4j.Logger;
Expand Down Expand Up @@ -36,6 +57,11 @@
import java.io.StringReader;
import java.util.*;

/**
* The main worker class for the range index.
*
* @author Wolfgang Meier
*/
public class RangeIndexWorker implements OrderedValuesIndex, QNamedKeysIndex {

private static final Logger LOG = Logger.getLogger(RangeIndexWorker.class);
Expand Down Expand Up @@ -95,6 +121,8 @@ public Query toQuery(String field, QName qname, AtomicValue content, RangeIndex.
query = new WildcardQuery(new Term(field, bytes));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
case MATCH:
return new RegexpQuery(new Term(field, key));
}
}
if (operator == RangeIndex.Operator.EQ) {
Expand Down Expand Up @@ -446,7 +474,7 @@ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<Q
return resultSet;
}

public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, Sequence fields, Sequence[] keys, RangeIndex.Operator operator, int axis) throws IOException, XPathException {
public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, Sequence fields, Sequence[] keys, RangeIndex.Operator[] operators, int axis) throws IOException, XPathException {
NodeSet resultSet = NodeSet.EMPTY_SET;
IndexSearcher searcher = null;
try {
Expand All @@ -467,12 +495,12 @@ public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, S
bool.setMinimumNumberShouldMatch(1);
for (SequenceIterator ki = keys[j].iterate(); ki.hasNext(); ) {
Item key = ki.nextItem();
Query q = toQuery(field, null, key.atomize(), operator, docs);
Query q = toQuery(field, null, key.atomize(), operators[j], docs);
bool.add(q, BooleanClause.Occur.SHOULD);
}
query.add(bool, BooleanClause.Occur.MUST);
} else {
Query q = toQuery(field, null, keys[j].itemAt(0).atomize(), operator, docs);
Query q = toQuery(field, null, keys[j].itemAt(0).atomize(), operators[j], docs);
query.add(q, BooleanClause.Occur.MUST);
}
}
Expand Down Expand Up @@ -744,7 +772,9 @@ public void startElement(Txn transaction, ElementImpl element, NodePath path) {
while (configIter.hasNext()) {
RangeIndexConfigElement configuration = configIter.next();
if (configuration.match(path)) {
contentStack.push(configuration.getCollector(path));
TextCollector collector = configuration.getCollector(path);
collector.startElement(element.getQName(), path);
contentStack.push(collector);
}
}
}
Expand Down
Expand Up @@ -15,11 +15,13 @@ public class SimpleTextCollector implements TextCollector {
private RangeIndexConfigElement config = null;
private XMLString buf = new XMLString();
private int wsTreatment = XMLString.SUPPRESS_NONE;
private boolean caseSensitive = true;

public SimpleTextCollector(RangeIndexConfigElement config, boolean includeNested, int wsTreatment) {
public SimpleTextCollector(RangeIndexConfigElement config, boolean includeNested, int wsTreatment, boolean caseSensitive) {
this.config = config;
this.includeNested = includeNested;
this.wsTreatment = wsTreatment;
this.caseSensitive = caseSensitive;
}

public SimpleTextCollector(String content) {
Expand Down Expand Up @@ -53,7 +55,7 @@ public int length() {
@Override
public List<Field> getFields() {
List<Field> fields = new ArrayList<Field>(1);
fields.add(new Field(buf, wsTreatment));
fields.add(new Field(buf, wsTreatment, caseSensitive));
return fields;
}
}

0 comments on commit 9257c27

Please sign in to comment.