Navigation Menu

Skip to content

Commit

Permalink
[feature] Lucene index: make query parser configurable.
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfgangmm committed Oct 17, 2013
1 parent 7d256e9 commit 37f766c
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 9 deletions.
@@ -1,5 +1,7 @@
package org.exist.indexing.lucene;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
Expand All @@ -11,6 +13,9 @@

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.util.Version;
import org.exist.dom.QName;
import org.exist.indexing.lucene.analyzers.NoDiacriticsStandardAnalyzer;
import org.exist.storage.NodePath;
Expand All @@ -26,6 +31,7 @@ public class LuceneConfig {
private final static String CONFIG_ROOT = "lucene";
private final static String INDEX_ELEMENT = "text";
private final static String ANALYZER_ELEMENT = "analyzer";
private final static String PARSER_ELEMENT = "parser";
protected final static String FIELD_TYPE_ELEMENT = "fieldType";
private static final String INLINE_ELEMENT = "inline";
private static final String IGNORE_ELEMENT = "ignore";
Expand All @@ -47,6 +53,8 @@ public class LuceneConfig {

private AnalyzerConfig analyzers = new AnalyzerConfig();

private String queryParser = null;

public LuceneConfig(NodeList configNodes, Map<String, String> namespaces) throws DatabaseConfigurationException {
parseConfig(configNodes, namespaces);
}
Expand Down Expand Up @@ -148,7 +156,38 @@ public Analyzer getAnalyzer(String field) {
public Analyzer getAnalyzerById(String id) {
return analyzers.getAnalyzerById(id);
}


public QueryParserBase getQueryParser(String field, Analyzer analyzer) {
QueryParserBase parser = null;
if (queryParser != null) {
try {
Class<?> clazz = Class.forName(queryParser);
if (QueryParserBase.class.isAssignableFrom(clazz)) {
final Class<?> cParamClasses[] = new Class<?>[] {
Version.class, String.class, Analyzer.class
};
final Constructor<?> cstr = clazz.getDeclaredConstructor(cParamClasses);
parser = (QueryParserBase) cstr.newInstance(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer);
}
} catch (ClassNotFoundException e) {
LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e);
} catch (NoSuchMethodException e) {
LOG.warn("Failed to instantiate lucene query parser class: " + queryParser + ": " + e.getMessage(), e);
} catch (InstantiationException e) {
LOG.warn("Failed to instantiate lucene query parser class: " + queryParser + ": " + e.getMessage(), e);
} catch (IllegalAccessException e) {
LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e);
} catch (InvocationTargetException e) {
LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e);
}
}
if (parser == null) {
// use default parser
parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer);
}
return parser;
}

public boolean isInlineNode(QName qname) {
return inlineNodes != null && inlineNodes.contains(qname);
}
Expand Down Expand Up @@ -200,7 +239,10 @@ protected void parseConfig(NodeList configNodes, Map<String, String> namespaces)

} else if (ANALYZER_ELEMENT.equals(node.getLocalName())) {
analyzers.addAnalyzer((Element) node);


} else if (PARSER_ELEMENT.equals(node.getLocalName())) {
queryParser = ((Element)node).getAttribute("class");

} else if (FIELD_TYPE_ELEMENT.equals(node.getLocalName())) {
FieldType type = new FieldType((Element) node, analyzers);
fieldTypes.put(type.getId(), type);
Expand Down
Expand Up @@ -31,6 +31,8 @@
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.search.*;
import org.apache.lucene.util.*;
import org.exist.collections.Collection;
Expand Down Expand Up @@ -387,7 +389,7 @@ public NodeSet query(XQueryContext context, int contextId, DocumentSet docs, Nod
for (QName qname : qnames) {
String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
Analyzer analyzer = getAnalyzer(null, qname, context.getBroker(), docs);
QueryParser parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer);
QueryParserBase parser = getQueryParser(field, analyzer, docs);
setOptions(options, parser);
Query query = parser.parse(queryStr);
searchAndProcess(contextId, qname, docs, contextSet, resultSet,
Expand All @@ -399,15 +401,15 @@ public NodeSet query(XQueryContext context, int contextId, DocumentSet docs, Nod
return resultSet;
}

protected void setOptions(Properties options, QueryParser parser) throws ParseException {
protected void setOptions(Properties options, CommonQueryParserConfiguration parser) throws ParseException {
if (options == null)
return;
String option = options.getProperty(OPTION_DEFAULT_OPERATOR);
if (option != null) {
if (option != null && parser instanceof QueryParserBase) {
if (DEFAULT_OPERATOR_OR.equals(option))
parser.setDefaultOperator(QueryParser.OR_OPERATOR);
((QueryParserBase)parser).setDefaultOperator(QueryParser.OR_OPERATOR);
else
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
((QueryParserBase)parser).setDefaultOperator(QueryParser.AND_OPERATOR);
}
option = options.getProperty(OPTION_LEADING_WILDCARD);
if (option != null)
Expand Down Expand Up @@ -509,7 +511,7 @@ public NodeSet queryField(XQueryContext context, int contextId, DocumentSet docs
searcher = index.getSearcher();
Analyzer analyzer = getAnalyzer(field, null, context.getBroker(), docs);
LOG.debug("Using analyzer " + analyzer + " for " + queryString);
QueryParser parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer);
QueryParserBase parser = getQueryParser(field, analyzer, docs);
setOptions(options, parser);
Query query = parser.parse(queryString);
searchAndProcess(contextId, null, docs, contextSet, resultSet,
Expand Down Expand Up @@ -633,7 +635,7 @@ public NodeImpl search(final XQueryContext context, final List<String> toBeMatch
final Analyzer searchAnalyzer = new StandardAnalyzer(Version.LUCENE_43);

// Setup query Version, default field, analyzer
final QueryParser parser = new QueryParser(Version.LUCENE_43, "", searchAnalyzer);
final QueryParserBase parser = getQueryParser("", searchAnalyzer, null);
final Query query = parser.parse(queryText);

// extract all used fields from query
Expand Down Expand Up @@ -992,6 +994,26 @@ protected Analyzer getAnalyzer(String field, QName qname, DBBroker broker, Docum
return index.getDefaultAnalyzer();
}

protected QueryParserBase getQueryParser(String field, Analyzer analyzer, DocumentSet docs) {
if (docs != null) {
for (Iterator<Collection> i = docs.getCollectionIterator(); i.hasNext(); ) {
Collection collection = i.next();
IndexSpec idxConf = collection.getIndexConfiguration(broker);
if (idxConf != null) {
LuceneConfig config = (LuceneConfig) idxConf.getCustomIndexSpec(LuceneIndex.ID);
if (config != null) {
QueryParserBase parser = config.getQueryParser(field, analyzer);
if (parser != null) {
return parser;
}
}
}
}
}
// not found. return default query parser:
return new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer);
}

public boolean checkIndex(DBBroker broker) {
return false; //To change body of implemented methods use File | Settings | File Templates.
}
Expand Down
14 changes: 14 additions & 0 deletions extensions/indexes/lucene/test/src/xquery/lucene/analyzers.xql
Expand Up @@ -9,6 +9,7 @@ declare variable $analyze:XCONF1 :=
<index xmlns:xs="http://www.w3.org/2001/XMLSchema">
<fulltext default="none" attributes="false"/>
<lucene diacritics="no">
<parser class="org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser"/>
<text qname="p"/>
</lucene>
</index>
Expand Down Expand Up @@ -92,4 +93,17 @@ declare
%test:assertEquals(1)
function analyze:diacrictics($term as xs:string) {
count(collection("/db/lucenetest/test2")//p[ft:query(., $term)])
};

declare
%test:args("rüssels*")
%test:assertEquals(2)
%test:args("russels*")
%test:assertEquals(2)
%test:args("maor*")
%test:assertEquals(2)
%test:args("Māor*")
%test:assertEquals(2)
function analyze:query-parser($term as xs:string) {
count(collection("/db/lucenetest/test1")//p[ft:query(., $term)])
};

0 comments on commit 37f766c

Please sign in to comment.