From b757303a205386730888e00d0622ee7b80bb1d3b Mon Sep 17 00:00:00 2001 From: Code Ferret Date: Wed, 5 Dec 2018 10:42:17 -0600 Subject: [PATCH] Merge dev NoIndex --- .../jena/query/text/EntityDefinition.java | 10 +++++ .../jena/query/text/TextIndexLucene.java | 41 ++++++++++--------- .../assembler/EntityDefinitionAssembler.java | 14 +++++++ .../jena/query/text/assembler/TextVocab.java | 1 + .../TestTextMultilingualEnhancements.java | 2 +- jena-text/src/test/resources/log4j.properties | 3 ++ 6 files changed, 50 insertions(+), 21 deletions(-) diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java index 39457d83ee1..d3a2d86fc38 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java @@ -36,6 +36,7 @@ public class EntityDefinition { private final Map predicateToField = new HashMap<>() ; private final Map fieldToAnalyzer = new HashMap<>() ; private final ListMultimap fieldToPredicate = ArrayListMultimap.create() ; + private final Map fieldToNoIndex = new HashMap<>() ; private final Collection fields = Collections.unmodifiableCollection(fieldToPredicate.keys()) ; // private final Collection fields = // Collections.unmodifiableCollection(fieldToPredicate.keySet()) ; @@ -148,6 +149,15 @@ public void setAnalyzer(String field, Analyzer analyzer) { public Analyzer getAnalyzer(String field) { return fieldToAnalyzer.get(field); } + + public void setNoIndex(String field, boolean b) { + fieldToNoIndex.put(field, b); + } + + public boolean getNoIndex(String field) { + Boolean b = fieldToNoIndex.get(field); + return b != null ? b : false; + } public String getPrimaryField() { return primaryField ; diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java index 1ddce70c130..b217c8aa8b4 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java @@ -101,6 +101,7 @@ public class TextIndexLucene implements TextIndex { private final String queryParserType ; private final FieldType ftText ; private final FieldType ftTextNotStored ; // used for lang derived fields + private final FieldType ftTextStoredNoIndex ; // used for lang derived fields private final boolean isMultilingual ; private final boolean ignoreIndexErrors ; @@ -161,6 +162,10 @@ public TextIndexLucene(Directory directory, TextIndexConfig config) { this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ; // the following is used for lang derived fields this.ftTextNotStored = TextField.TYPE_NOT_STORED ; + this.ftTextStoredNoIndex = new FieldType(); + this.ftTextStoredNoIndex.setIndexOptions(IndexOptions.NONE); + this.ftTextStoredNoIndex.setStored(true); + this.ftTextStoredNoIndex.freeze(); if (config.isValueStored() && docDef.getLangField() == null) log.warn("Values stored but langField not set. Returned values will not have language tag or datatype."); @@ -343,7 +348,10 @@ protected Document doc(Entity entity) { String uidField = docDef.getUidField() ; for ( Entry e : entity.getMap().entrySet() ) { - doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) ); + String field = e.getKey(); + String value = (String) e.getValue(); + FieldType ft = (docDef.getNoIndex(field)) ? ftTextStoredNoIndex : ftText ; + doc.add( new Field(field, value, ft) ); if (langField != null) { String lang = entity.getLanguage(); RDFDatatype datatype = entity.getDatatype(); @@ -351,12 +359,12 @@ protected Document doc(Entity entity) { doc.add(new Field(langField, lang, StringField.TYPE_STORED)); if (this.isMultilingual) { // add a field that uses a language-specific analyzer via MultilingualAnalyzer - doc.add(new Field(e.getKey() + "_" + lang, (String) e.getValue(), ftTextNotStored)); + doc.add(new Field(field + "_" + lang, value, ftTextNotStored)); // add fields for any defined auxiliary indexes List auxIndexes = Util.getAuxIndexes(lang); if (auxIndexes != null) { for (String auxTag : auxIndexes) { - doc.add(new Field(e.getKey() + "_" + auxTag, (String) e.getValue(), ftTextNotStored)); + doc.add(new Field(field + "_" + auxTag, value, ftTextNotStored)); } } } @@ -366,7 +374,7 @@ protected Document doc(Entity entity) { } } if (uidField != null) { - String hash = entity.getChecksum(e.getKey(), (String) e.getValue()); + String hash = entity.getChecksum(field, value); doc.add(new Field(uidField, hash, StringField.TYPE_STORED)); } } @@ -630,32 +638,25 @@ private Analyzer getQueryAnalyzer(boolean usingSearchFor, String lang) { } else { if (this.isMultilingual && StringUtils.isNotEmpty(lang) && !lang.equals("none")) { textField += "_" + lang; - } - - if (docDef.getField(property) != null) { + textClause = textField + ":" + qs; + } else if (docDef.getField(property) != null) { textClause = textField + ":" + qs; } else { textClause = qs; } - - String langClause = null; - if (langField != null) { - langClause = StringUtils.isNotEmpty(lang) ? (!lang.equals("none") ? langField + ":" + lang : "-" + langField + ":*") : null; + + if (langField != null && StringUtils.isNotEmpty(lang)) { + textClause = "(" + textClause + ") AND " + (!lang.equals("none") ? langField + ":" + lang : "-" + langField + ":*"); } - if (langClause != null) - textClause = "(" + textClause + ") AND " + langClause ; } - String graphClause = null; - if (graphURI != null) { - String escaped = QueryParserBase.escape(graphURI) ; - graphClause = getDocDef().getGraphField() + ":" + escaped ; - } String queryString = textClause ; - if (graphClause != null) - queryString = "(" + queryString + ") AND " + graphClause ; + if (graphURI != null) { + String escaped = QueryParserBase.escape(graphURI) ; + queryString = "(" + queryString + ") AND " + getDocDef().getGraphField() + ":" + escaped ; + } Analyzer qa = getQueryAnalyzer(usingSearchFor, lang); Query query = parseQuery(queryString, qa) ; diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java index 48bd94d6dcc..c66b0a8c297 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java @@ -99,6 +99,7 @@ public EntityDefinition open(Assembler a, Resource root, Mode mode) Multimap mapDefs = HashMultimap.create() ; Map analyzerDefs = new HashMap<>(); + Map noIndexDefs = new HashMap<>(); Statement listStmt = root.getProperty(TextVocab.pMap); while (listStmt != null) { @@ -141,6 +142,16 @@ public EntityDefinition open(Assembler a, Resource root, Mode mode) } mapDefs.put(field, n.asNode()) ; + Statement noIndexStatement = listEntry.getProperty(TextVocab.pNoIndex); + if (noIndexStatement != null) { + n = noIndexStatement.getObject(); + if (! n.isLiteral()) { + throw new TextIndexException("Text map entry noIndex property must be a boolean : " + n); + } + boolean noInx = n.asLiteral().getBoolean(); + noIndexDefs.put(field, noInx) ; + } + Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer); if (analyzerStatement != null) { n = analyzerStatement.getObject(); @@ -171,6 +182,9 @@ public EntityDefinition open(Assembler a, Resource root, Mode mode) for ( Node p : mapDefs.get(f)) docDef.set(f, p) ; } + for (String f : noIndexDefs.keySet()) { + docDef.setNoIndex(f, noIndexDefs.get(f)); + } for (String f : analyzerDefs.keySet()) { docDef.setAnalyzer(f, analyzerDefs.get(f)); } diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java index ee2bdb38cf5..76f5ac9fb27 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java @@ -57,6 +57,7 @@ public class TextVocab public static final Property pMap = Vocab.property(NS, "map") ; public static final Property pField = Vocab.property(NS, "field") ; public static final Property pPredicate = Vocab.property(NS, "predicate") ; + public static final Property pNoIndex = Vocab.property(NS, "noIndex") ; public static final Property pOptional = Vocab.property(NS, "optional") ; // Analyzers diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java index 5ca5a163d42..1ac5ffff7f7 100644 --- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java +++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java @@ -129,7 +129,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex " text:langField \"lang\" ;", " text:graphField \"graph\" ;", " text:map (", - " [ text:field \"label\" ; text:predicate rdfs:label ]", + " [ text:field \"label\" ; text:predicate rdfs:label ; text:noIndex true ]", " [ text:field \"comment\" ; text:predicate rdfs:comment ]", " ) ." ); diff --git a/jena-text/src/test/resources/log4j.properties b/jena-text/src/test/resources/log4j.properties index cf96ece1caa..188b1ca4a30 100644 --- a/jena-text/src/test/resources/log4j.properties +++ b/jena-text/src/test/resources/log4j.properties @@ -8,3 +8,6 @@ log4j.appender.stdlog.layout.ConversionPattern=%d{HH:mm:ss} %-5p %-25c{1} :: %m% # Execution logging log4j.logger.org.apache.jena.arq.info=INFO log4j.logger.org.apache.jena.arq.exec=INFO + +# XXX: uncomment to get the Lucene queries +# log4j.logger.org.apache.jena.query.text=TRACE