From 37f766cb7b29e6c28bc1b99b41d3b2b9f595e7a2 Mon Sep 17 00:00:00 2001 From: Wolfgang Meier Date: Thu, 17 Oct 2013 18:15:35 +0200 Subject: [PATCH] [feature] Lucene index: make query parser configurable. --- .../exist/indexing/lucene/LuceneConfig.java | 46 ++++++++++++++++++- .../indexing/lucene/LuceneIndexWorker.java | 36 ++++++++++++--- .../test/src/xquery/lucene/analyzers.xql | 14 ++++++ 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java index 7370236ad12..33fca4df3d7 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneConfig.java @@ -1,5 +1,7 @@ package org.exist.indexing.lucene; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -11,6 +13,9 @@ import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.util.Version; import org.exist.dom.QName; import org.exist.indexing.lucene.analyzers.NoDiacriticsStandardAnalyzer; import org.exist.storage.NodePath; @@ -26,6 +31,7 @@ public class LuceneConfig { private final static String CONFIG_ROOT = "lucene"; private final static String INDEX_ELEMENT = "text"; private final static String ANALYZER_ELEMENT = "analyzer"; + private final static String PARSER_ELEMENT = "parser"; protected final static String FIELD_TYPE_ELEMENT = "fieldType"; private static final String INLINE_ELEMENT = "inline"; private static final String IGNORE_ELEMENT = "ignore"; @@ -47,6 +53,8 @@ public class LuceneConfig { private AnalyzerConfig analyzers = new AnalyzerConfig(); + private String queryParser = null; + public LuceneConfig(NodeList configNodes, Map namespaces) throws DatabaseConfigurationException { parseConfig(configNodes, namespaces); } @@ -148,7 +156,38 @@ public Analyzer getAnalyzer(String field) { public Analyzer getAnalyzerById(String id) { return analyzers.getAnalyzerById(id); } - + + public QueryParserBase getQueryParser(String field, Analyzer analyzer) { + QueryParserBase parser = null; + if (queryParser != null) { + try { + Class clazz = Class.forName(queryParser); + if (QueryParserBase.class.isAssignableFrom(clazz)) { + final Class cParamClasses[] = new Class[] { + Version.class, String.class, Analyzer.class + }; + final Constructor cstr = clazz.getDeclaredConstructor(cParamClasses); + parser = (QueryParserBase) cstr.newInstance(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer); + } + } catch (ClassNotFoundException e) { + LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e); + } catch (NoSuchMethodException e) { + LOG.warn("Failed to instantiate lucene query parser class: " + queryParser + ": " + e.getMessage(), e); + } catch (InstantiationException e) { + LOG.warn("Failed to instantiate lucene query parser class: " + queryParser + ": " + e.getMessage(), e); + } catch (IllegalAccessException e) { + LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e); + } catch (InvocationTargetException e) { + LOG.warn("Failed to instantiate lucene query parser class: " + queryParser, e); + } + } + if (parser == null) { + // use default parser + parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer); + } + return parser; + } + public boolean isInlineNode(QName qname) { return inlineNodes != null && inlineNodes.contains(qname); } @@ -200,7 +239,10 @@ protected void parseConfig(NodeList configNodes, Map namespaces) } else if (ANALYZER_ELEMENT.equals(node.getLocalName())) { analyzers.addAnalyzer((Element) node); - + + } else if (PARSER_ELEMENT.equals(node.getLocalName())) { + queryParser = ((Element)node).getAttribute("class"); + } else if (FIELD_TYPE_ELEMENT.equals(node.getLocalName())) { FieldType type = new FieldType((Element) node, analyzers); fieldTypes.put(type.getId(), type); diff --git a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java index 57e30335a06..a2d213a2332 100644 --- a/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java +++ b/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java @@ -31,6 +31,8 @@ import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; import org.apache.lucene.search.*; import org.apache.lucene.util.*; import org.exist.collections.Collection; @@ -387,7 +389,7 @@ public NodeSet query(XQueryContext context, int contextId, DocumentSet docs, Nod for (QName qname : qnames) { String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols()); Analyzer analyzer = getAnalyzer(null, qname, context.getBroker(), docs); - QueryParser parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer); + QueryParserBase parser = getQueryParser(field, analyzer, docs); setOptions(options, parser); Query query = parser.parse(queryStr); searchAndProcess(contextId, qname, docs, contextSet, resultSet, @@ -399,15 +401,15 @@ public NodeSet query(XQueryContext context, int contextId, DocumentSet docs, Nod return resultSet; } - protected void setOptions(Properties options, QueryParser parser) throws ParseException { + protected void setOptions(Properties options, CommonQueryParserConfiguration parser) throws ParseException { if (options == null) return; String option = options.getProperty(OPTION_DEFAULT_OPERATOR); - if (option != null) { + if (option != null && parser instanceof QueryParserBase) { if (DEFAULT_OPERATOR_OR.equals(option)) - parser.setDefaultOperator(QueryParser.OR_OPERATOR); + ((QueryParserBase)parser).setDefaultOperator(QueryParser.OR_OPERATOR); else - parser.setDefaultOperator(QueryParser.AND_OPERATOR); + ((QueryParserBase)parser).setDefaultOperator(QueryParser.AND_OPERATOR); } option = options.getProperty(OPTION_LEADING_WILDCARD); if (option != null) @@ -509,7 +511,7 @@ public NodeSet queryField(XQueryContext context, int contextId, DocumentSet docs searcher = index.getSearcher(); Analyzer analyzer = getAnalyzer(field, null, context.getBroker(), docs); LOG.debug("Using analyzer " + analyzer + " for " + queryString); - QueryParser parser = new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer); + QueryParserBase parser = getQueryParser(field, analyzer, docs); setOptions(options, parser); Query query = parser.parse(queryString); searchAndProcess(contextId, null, docs, contextSet, resultSet, @@ -633,7 +635,7 @@ public NodeImpl search(final XQueryContext context, final List toBeMatch final Analyzer searchAnalyzer = new StandardAnalyzer(Version.LUCENE_43); // Setup query Version, default field, analyzer - final QueryParser parser = new QueryParser(Version.LUCENE_43, "", searchAnalyzer); + final QueryParserBase parser = getQueryParser("", searchAnalyzer, null); final Query query = parser.parse(queryText); // extract all used fields from query @@ -992,6 +994,26 @@ protected Analyzer getAnalyzer(String field, QName qname, DBBroker broker, Docum return index.getDefaultAnalyzer(); } + protected QueryParserBase getQueryParser(String field, Analyzer analyzer, DocumentSet docs) { + if (docs != null) { + for (Iterator i = docs.getCollectionIterator(); i.hasNext(); ) { + Collection collection = i.next(); + IndexSpec idxConf = collection.getIndexConfiguration(broker); + if (idxConf != null) { + LuceneConfig config = (LuceneConfig) idxConf.getCustomIndexSpec(LuceneIndex.ID); + if (config != null) { + QueryParserBase parser = config.getQueryParser(field, analyzer); + if (parser != null) { + return parser; + } + } + } + } + } + // not found. return default query parser: + return new QueryParser(LuceneIndex.LUCENE_VERSION_IN_USE, field, analyzer); + } + public boolean checkIndex(DBBroker broker) { return false; //To change body of implemented methods use File | Settings | File Templates. } diff --git a/extensions/indexes/lucene/test/src/xquery/lucene/analyzers.xql b/extensions/indexes/lucene/test/src/xquery/lucene/analyzers.xql index 9829fce6570..3e31bf6c0fe 100644 --- a/extensions/indexes/lucene/test/src/xquery/lucene/analyzers.xql +++ b/extensions/indexes/lucene/test/src/xquery/lucene/analyzers.xql @@ -9,6 +9,7 @@ declare variable $analyze:XCONF1 := + @@ -92,4 +93,17 @@ declare %test:assertEquals(1) function analyze:diacrictics($term as xs:string) { count(collection("/db/lucenetest/test2")//p[ft:query(., $term)]) +}; + +declare + %test:args("rüssels*") + %test:assertEquals(2) + %test:args("russels*") + %test:assertEquals(2) + %test:args("maor*") + %test:assertEquals(2) + %test:args("Māor*") + %test:assertEquals(2) +function analyze:query-parser($term as xs:string) { + count(collection("/db/lucenetest/test1")//p[ft:query(., $term)]) }; \ No newline at end of file