From 1440e81d75ee01baf874c407d5f0017bc59c6787 Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Mon, 17 Apr 2017 14:53:41 -0500 Subject: [PATCH 01/13] initial commit for generic analyzers --- .../assembler/GenericAnalyzerAssembler.java | 20 +++++++++++++++++++ .../query/text/assembler/TextAssembler.java | 1 + .../jena/query/text/assembler/TextVocab.java | 1 + 3 files changed, 22 insertions(+) create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java new file mode 100644 index 00000000000..5c25cb2f234 --- /dev/null +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -0,0 +1,20 @@ +package org.apache.jena.query.text.assembler; + +import org.apache.jena.assembler.Assembler; +import org.apache.jena.assembler.Mode; +import org.apache.jena.assembler.assemblers.AssemblerBase; +import org.apache.jena.rdf.model.Resource; + +public class GenericAnalyzerAssembler extends AssemblerBase { + + public GenericAnalyzerAssembler() { + // TODO Auto-generated constructor stub + } + + @Override + public Object open(Assembler a, Resource root, Mode mode) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java index 80b2f7e3259..636c6bc3929 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java @@ -37,6 +37,7 @@ public static void init() Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ; + Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ; } } diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java index 719d40469aa..bc49d1048b3 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java @@ -66,6 +66,7 @@ public class TextVocab public static final Resource lowerCaseKeywordAnalyzer = Vocab.resource(NS, "LowerCaseKeywordAnalyzer"); public static final Resource localizedAnalyzer = Vocab.resource(NS, "LocalizedAnalyzer"); public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer"); + public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer"); // Tokenizers public static final Resource standardTokenizer = Vocab.resource(NS, "StandardTokenizer"); From 8b3757bae52d08d4b308bd0f996ff452c60cc7c9 Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Wed, 19 Apr 2017 14:43:04 -0500 Subject: [PATCH 02/13] initial documentation --- .../assembler/GenericAnalyzerAssembler.java | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java index 5c25cb2f234..db707d2b242 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.query.text.assembler; import org.apache.jena.assembler.Assembler; @@ -5,7 +23,74 @@ import org.apache.jena.assembler.assemblers.AssemblerBase; import org.apache.jena.rdf.model.Resource; +/** + * Creates generic analyzers given a fully qualified Class name and a list + * of parameters for a constructor of the Class. + *

+ * The parameters may be of the following types: + *

+ *     string    String
+ *     set       org.apache.lucene.analysis.util.CharArraySet
+ *     file      java.io.FileReader
+ *     int       int
+ *     boolean   boolean
+ * 
+ * + * Although the list of types is not exhaustive it is a simple matter + * to create a wrapper Analyzer that reads a file with information that can + * be used to initialize any sort of parameters that may be needed for + * a given Analyzer. The provided types cover the vast majority of cases. + *

+ * For example, org.apache.lucene.analysis.ja.JapaneseAnalyzer + * has a constructor with 4 parameters: a UserDict, + * a CharArraySet, a JapaneseTokenizer.Mode, and a + * Set<String>. So a simple wrapper can extract the values + * needed for the various parameters with types not available in this + * extension, construct the required instances, and instantiate the + * JapaneseAnalyzer. + *

+ * Adding custom Analyzers such as the above wrapper analyzer is a simple + * matter of adding the Analyzer class and any associated filters and tokenizer + * and so on to the classpath for Jena - usually in a jar. Of course, all of + * the Analyzers that are included in the Lucene distribution bundled with Jena + * are available as generic Analyzers as well. + *

+ * Each parameter object is specified with: + *

+ *

+ * A parameter of type set may have zero or more text:paramValues. + *

+ * A parameter of type string, file, boolean, or + * int must have a single text:paramValue + */ public class GenericAnalyzerAssembler extends AssemblerBase { + /* + text:map ( + [ text:field "text" ; + text:predicate rdfs:label; + text:analyzer [ + a text:GenericAnalyzer ; + text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ; + text:params [ + a rdf:seq ; + rdf:_1 [ + text:paramName "stopwords" ; + text:paramType "set" ; + text:paramValue "the", "a", "an" ] ; + rdf:_2 [ + text:paramName "stemExclusionSet" ; + text:paramType "set" ; + text:paramValue "ing", "ed" ] + ] + ] + ] . + */ public GenericAnalyzerAssembler() { // TODO Auto-generated constructor stub From 27ea30b73855d7a3cf0cd9561d2089295ec03353 Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Thu, 20 Apr 2017 15:37:00 -0500 Subject: [PATCH 03/13] implement GenericAnalyzerAssembler. TO DO: Tests --- .../assembler/GenericAnalyzerAssembler.java | 332 ++++++++++++++++-- .../query/text/assembler/TextAssembler.java | 2 +- .../jena/query/text/assembler/TextVocab.java | 8 +- 3 files changed, 318 insertions(+), 24 deletions(-) diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java index db707d2b242..7fb04cce4dc 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -18,10 +18,24 @@ package org.apache.jena.query.text.assembler; +import java.io.Reader; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.List; + import org.apache.jena.assembler.Assembler; import org.apache.jena.assembler.Mode; import org.apache.jena.assembler.assemblers.AssemblerBase; +import org.apache.jena.atlas.logging.Log ; +import org.apache.jena.query.text.TextIndexException; +import org.apache.jena.rdf.model.Literal; +import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.vocabulary.RDF; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; /** * Creates generic analyzers given a fully qualified Class name and a list @@ -64,10 +78,29 @@ *

  • a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.
  • * *

    - * A parameter of type set may have zero or more text:paramValues. + * A parameter of type set must have a list of zero or more Strings. *

    * A parameter of type string, file, boolean, or - * int must have a single text:paramValue + * int must have a single text:paramValue of the appropriate type. + *

    + * Example: + *

    +    text:map (
    +         [ text:field "text" ; 
    +           text:predicate rdfs:label;
    +           text:analyzer [
    +               a text:GenericAnalyzer ;
    +               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
    +               text:params (
    +                    [ text:paramName "stopwords" ;
    +                      text:paramType "set" ;
    +                      text:paramValue ("the" "a" "an") ]
    +                    [ text:paramName "stemExclusionSet" ;
    +                      text:paramType "set" ;
    +                      text:paramValue ("ing" "ed") ]
    +                    )
    +           ] .
    + * 
    */ public class GenericAnalyzerAssembler extends AssemblerBase { /* @@ -77,29 +110,284 @@ public class GenericAnalyzerAssembler extends AssemblerBase { text:analyzer [ a text:GenericAnalyzer ; text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ; - text:params [ - a rdf:seq ; - rdf:_1 [ - text:paramName "stopwords" ; - text:paramType "set" ; - text:paramValue "the", "a", "an" ] ; - rdf:_2 [ - text:paramName "stemExclusionSet" ; - text:paramType "set" ; - text:paramValue "ing", "ed" ] - ] - ] - ] . + text:params ( + [ text:paramName "stopwords" ; + text:paramType "set" ; + text:paramValue ("the" "a" "an") ] + [ text:paramName "stemExclusionSet" ; + text:paramType "set" ; + text:paramValue ("ing" "ed") ] + ) + ] . */ - public GenericAnalyzerAssembler() { - // TODO Auto-generated constructor stub - } - @Override - public Object open(Assembler a, Resource root, Mode mode) { - // TODO Auto-generated method stub - return null; + public Analyzer open(Assembler a, Resource root, Mode mode) { + if (root.hasProperty(TextVocab.pClass)) { + // text:class is expected to be a string literal + String className = root.getProperty(TextVocab.pClass).getString(); + + // is the class accessible? + Class clazz = null; + try { + clazz = Class.forName(className); + } catch (ClassNotFoundException e) { + Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e); + return null; + } + + // Is the class an Analyzer? + if (!Analyzer.class.isAssignableFrom(clazz)) { + Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName()); + return null; + } + + if (root.hasProperty(TextVocab.pParams)) { + RDFNode node = root.getProperty(TextVocab.pParams).getObject(); + if (! node.isResource()) { + throw new TextIndexException("text:params must be a list of parameter resources: " + node); + } + + List specs = getParamSpecs((Resource) node); + + // split the param specs into classes and values for constructor lookup + final Class paramClasses[] = new Class[specs.size()]; + final Object paramValues[] = new Object[specs.size()]; + for (int i = 0; i < specs.size(); i++) { + ParamSpec spec = specs.get(i); + paramClasses[i] = spec.getValueClass(); + paramValues[i] = spec.getValue(); + } + + // Create new analyzer + return newAnalyzer(clazz, paramClasses, paramValues); + + } else { + // use the nullary Analyzer constructor + return newAnalyzer(clazz, new Class[0], new Object[0]); + } + } else { + throw new TextIndexException("text:class property is required by GenericAnalyzer"); + } } + /** + * Create instance of the Lucene Analyzer, class, with provided parameters + * + * @param clazz The analyzer class + * @param paramClasses The parameter classes + * @param paramValues The parameter values + * @return The lucene analyzer + */ + private Analyzer newAnalyzer(Class clazz, Class[] paramClasses, Object[] paramValues) { + + String className = clazz.getName(); + + try { + final Constructor cstr = clazz.getDeclaredConstructor(paramClasses); + + return (Analyzer) cstr.newInstance(paramValues); + + } catch (IllegalArgumentException | IllegalAccessException | InstantiationException | InvocationTargetException | SecurityException e) { + Log.error(this, "Exception while instantiating analyzer class " + className + ". " + e.getMessage(), e); + } catch (NoSuchMethodException ex) { + Log.error(this, "Could not find matching analyzer class constructor for " + className + " " + ex.getMessage(), ex); + } + + return null; + } + + private List getParamSpecs(Resource list) { + List result = new ArrayList<>(); + Resource current = list; + + while (current != null && ! current.equals(RDF.nil)){ + Statement firstStmt = current.getProperty(RDF.first); + if (firstStmt == null) { + throw new TextIndexException("parameter list not well formed: " + current); + } + + RDFNode first = firstStmt.getObject(); + if (! first.isResource()) { + throw new TextIndexException("parameter specification must be an anon resource : " + first); + } + + result.add(getParamSpec((Resource) first)); + + Statement restStmt = current.getProperty(RDF.rest); + if (restStmt == null) { + throw new TextIndexException("parameter list not terminated by rdf:nil"); + } + + RDFNode rest = restStmt.getObject(); + if (! rest.isResource()) { + throw new TextIndexException("parameter list node is not a resource : " + rest); + } + + current = (Resource) rest; + } + + return result; + } + + private ParamSpec getParamSpec(Resource node) { + Statement nameStmt = node.getProperty(TextVocab.pParamName); + Statement typeStmt = node.getProperty(TextVocab.pParamType); + Statement valueStmt = node.getProperty(TextVocab.pParamValue); + + String name = getStringValue(nameStmt); + String type = getStringValue(typeStmt); + String value = getStringValue(valueStmt); + + switch (type) { + + // String + case "string": { + if (value == null) { + throw new TextIndexException("Value for string param: " + name + " must not be empty!"); + } + + return new ParamSpec(name, value, String.class); + } + + // "java.io.FileReader": + case "file": { + + if (value == null) { + throw new TextIndexException("Value for file param must exist and must contain a file name."); + } + + try { + // The analyzer is responsible for closing the file + Reader fileReader = new java.io.FileReader(value); + return new ParamSpec(name, fileReader, Reader.class); + + } catch (java.io.FileNotFoundException ex) { + throw new TextIndexException("File " + value + " for param " + name + " not found!"); + } + } + + // "org.apache.lucene.analysis.util.CharArraySet": + case "set": { + if (valueStmt == null) { + throw new TextIndexException("A set param spec must have a text:paramValue:" + node); + } + + RDFNode valueNode = valueStmt.getObject(); + if (!valueNode.isResource()) { + throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode); + } + + List values = toStrings((Resource) valueNode); + + return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class); + } + + // "int": + case "int": + if (value == null) { + throw new TextIndexException("Value for int param: " + name + " must not be empty!"); + } + + int n = ((Literal) valueStmt.getObject()).getInt(); + return new ParamSpec(name, n, int.class); + + // "boolean": + case "boolean": + if (value == null) { + throw new TextIndexException("Value for boolean param: " + name + " must not be empty!"); + } + + boolean b = ((Literal) valueStmt.getObject()).getBoolean(); + return new ParamSpec(name, b, boolean.class); + + default: + // there was no match + Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value); + break; + } + + return null; + } + + private String getStringValue(Statement stmt) { + if (stmt == null) { + return null; + } else { + RDFNode node = stmt.getObject(); + if (node.isLiteral()) { + return ((Literal) node).getLexicalForm(); + } else { + return null; + } + } + } + + private List toStrings(Resource list) { + List result = new ArrayList<>(); + Resource current = list; + + while (current != null && ! current.equals(RDF.nil)){ + Statement firstStmt = current.getProperty(RDF.first); + if (firstStmt == null) { + throw new TextIndexException("param spec of type set not well formed"); + } + + RDFNode first = firstStmt.getObject(); + if (! first.isLiteral()) { + throw new TextIndexException("param spec of type set item is not a literal: " + first); + } + + result.add(((Literal)first).getLexicalForm()); + + Statement restStmt = current.getProperty(RDF.rest); + if (restStmt == null) { + throw new TextIndexException("param spec of type set not terminated by rdf:nil"); + } + + RDFNode rest = restStmt.getObject(); + if (! rest.isResource()) { + throw new TextIndexException("param spec of type set rest is not a resource: " + rest); + } + + current = (Resource) rest; + } + + return result; + } + + /** + * ParamSpec contains the name, Class, and + * value of a parameter for a constructor (or really any method in general) + */ + private static final class ParamSpec { + + private final String name; + private final Object value; + private final Class clazz; + + @SuppressWarnings("unused") + public ParamSpec(String key, Object value) { + this(key, value, value.getClass()); + } + + public ParamSpec(String key, Object value, Class clazz) { + this.name = key; + this.value = value; + this.clazz = clazz; + } + + @SuppressWarnings("unused") + public String getKey() { + return name; + } + + public Object getValue() { + return value; + } + + public Class getValueClass() { + return clazz; + } + } } diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java index 636c6bc3929..45f5cee7cec 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java @@ -37,7 +37,7 @@ public static void init() Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ; - Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ; + Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ; } } diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java index bc49d1048b3..cd1844d2cd3 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java @@ -66,7 +66,6 @@ public class TextVocab public static final Resource lowerCaseKeywordAnalyzer = Vocab.resource(NS, "LowerCaseKeywordAnalyzer"); public static final Resource localizedAnalyzer = Vocab.resource(NS, "LocalizedAnalyzer"); public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer"); - public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer"); // Tokenizers public static final Resource standardTokenizer = Vocab.resource(NS, "StandardTokenizer"); @@ -87,5 +86,12 @@ public class TextVocab public static final Property pReplicas = Vocab.property(NS, "replicas"); public static final Property pIndexName = Vocab.property(NS, "indexName"); + //GenericAnalyzer + public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer"); + public static final Property pClass = Vocab.property(NS, "class"); + public static final Property pParams = Vocab.property(NS, "params"); + public static final Property pParamName = Vocab.property(NS, "paramName"); + public static final Property pParamType = Vocab.property(NS, "paramType"); + public static final Property pParamValue = Vocab.property(NS, "paramValue"); } From 8f1fa7ccbf2cb05f2eed121831c39e07260ec18b Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Fri, 21 Apr 2017 16:02:20 -0500 Subject: [PATCH 04/13] adding GenericAnalyzer tests --- .../assembler/GenericAnalyzerAssembler.java | 2 +- .../org/apache/jena/query/text/TS_Text.java | 2 + .../TestGenericAnalyzerAssembler.java | 116 ++++++++++++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java index 7fb04cce4dc..0fa706efcb7 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -167,7 +167,7 @@ public Analyzer open(Assembler a, Resource root, Mode mode) { return newAnalyzer(clazz, new Class[0], new Object[0]); } } else { - throw new TextIndexException("text:class property is required by GenericAnalyzer"); + throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root); } } diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java index 7259b111f84..91663f22f86 100644 --- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java +++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java @@ -21,6 +21,7 @@ import org.apache.jena.query.text.assembler.TestEntityMapAssembler; import org.apache.jena.query.text.assembler.TestTextDatasetAssembler; import org.apache.jena.query.text.assembler.TestTextIndexLuceneAssembler; +import org.apache.jena.query.text.assembler.TestGenericAnalyzerAssembler; import org.junit.runner.RunWith; import org.junit.runners.Suite; import org.junit.runners.Suite.SuiteClasses; @@ -47,6 +48,7 @@ , TestDatasetWithConfigurableAnalyzer.class , TestDatasetWithAnalyzingQueryParser.class , TestDatasetWithComplexPhraseQueryParser.class + , TestGenericAnalyzerAssembler.class }) public class TS_Text diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java new file mode 100644 index 00000000000..e2a48938b2f --- /dev/null +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java @@ -0,0 +1,116 @@ +package org.apache.jena.query.text.assembler; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.apache.jena.assembler.Assembler; +import org.apache.jena.atlas.logging.Log; +import org.apache.jena.atlas.logging.LogCtl; +import org.apache.jena.query.text.EntityDefinition; +import org.apache.jena.query.text.TextIndexLucene; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.Property; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.vocabulary.RDF; +import org.apache.jena.vocabulary.RDFS; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestGenericAnalyzerAssembler { + +// // Suppress warnings +// @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); } +// @AfterClass public static void afterClass() { LogCtl.setInfo(EntityDefinitionAssembler.class); } + + private static final String TESTBASE = "http://example.org/test/"; + private static final Resource spec1; + private static final Resource spec2; + private static final Resource spec3; + + @Test public void AnalyzerNullaryCtor() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec1, null); + assertEquals(SimpleAnalyzer.class, analyzer.getClass()); + } + + @Test public void AnalyzerNullaryCtor2() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec2, null); + assertEquals(FrenchAnalyzer.class, analyzer.getClass()); + } + + @Test public void AnalyzerCtorSet1() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec3, null); + assertEquals(FrenchAnalyzer.class, analyzer.getClass()); + } + + + private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer"; + private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer"; + + private static final String PARAM_TYPE_BOOL = "boolean"; + private static final String PARAM_TYPE_FILE = "file"; + private static final String PARAM_TYPE_INT = "int"; + private static final String PARAM_TYPE_SET = "set"; + private static final String PARAM_TYPE_STRING = "string"; + + static { + TextAssembler.init(); + Model model = ModelFactory.createDefaultModel(); + + // analyzer spec w/ no params + + spec1 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_SIMPLE) + ; + + // analyzer spec w/ empty params + + spec2 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_FRENCH) + .addProperty(TextVocab.pParams, + model.createList( + new RDFNode[] { } ) + ) + ; + + // analyzer spec w/ one set param + + spec3 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_FRENCH) + .addProperty(TextVocab.pParams, + model.createList( + new RDFNode[] { + model.createResource() + .addProperty(TextVocab.pParamName, "stopWords") + .addProperty(TextVocab.pParamType, PARAM_TYPE_SET) + .addProperty(TextVocab.pParamValue, strs2list(model, "les le du")) + })) + ; + } + + private static Resource strs2list(Model model, String string) { + String[] members = string.split("\\s"); + Resource current = RDF.nil; + for (int i = members.length-1; i>=0; i--) { + Resource previous = current; + current = model.createResource(); + current.addProperty(RDF.rest, previous); + current.addProperty(RDF.first, members[i]); + } + return current; + } +} From d2f0561b99c957658261b3693e4a89892369a65a Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Sat, 22 Apr 2017 12:29:04 -0500 Subject: [PATCH 05/13] added parameters of type org.apache.lucene.analysis.Analyzer --- .../assembler/GenericAnalyzerAssembler.java | 61 +++++++++++++++---- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java index 0fa706efcb7..853fcb614d7 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -48,6 +48,7 @@ * file java.io.FileReader * int int * boolean boolean + * analyzer org.apache.lucene.analysis.Analyzer * * * Although the list of types is not exhaustive it is a simple matter @@ -83,7 +84,7 @@ * A parameter of type string, file, boolean, or * int must have a single text:paramValue of the appropriate type. *

    - * Example: + * Examples: *

         text:map (
              [ text:field "text" ; 
    @@ -101,6 +102,23 @@
                         )
                ] .
      * 
    + *
    +    text:map (
    +         [ text:field "text" ; 
    +           text:predicate rdfs:label;
    +           text:analyzer [
    +               a text:GenericAnalyzer ;
    +               text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
    +               text:params (
    +                    [ text:paramName "defaultAnalyzer" ;
    +                      text:paramType "analyzer" ;
    +                      text:paramValue [ a text:SimpleAnalyzer ] ]
    +                    [ text:paramName "maxShingleSize" ;
    +                      text:paramType "int" ;
    +                      text:paramValue 3 ]
    +                    )
    +           ] .
    + * 
    */ public class GenericAnalyzerAssembler extends AssemblerBase { /* @@ -121,7 +139,14 @@ public class GenericAnalyzerAssembler extends AssemblerBase { ] . */ - @Override + public static final String TYPE_ANALYZER = "analyzer"; + public static final String TYPE_BOOL = "boolean"; + public static final String TYPE_FILE = "file"; + public static final String TYPE_INT = "int"; + public static final String TYPE_SET = "set"; + public static final String TYPE_STRING = "string"; + + @Override public Analyzer open(Assembler a, Resource root, Mode mode) { if (root.hasProperty(TextVocab.pClass)) { // text:class is expected to be a string literal @@ -242,7 +267,7 @@ private ParamSpec getParamSpec(Resource node) { switch (type) { // String - case "string": { + case TYPE_STRING: { if (value == null) { throw new TextIndexException("Value for string param: " + name + " must not be empty!"); } @@ -250,8 +275,8 @@ private ParamSpec getParamSpec(Resource node) { return new ParamSpec(name, value, String.class); } - // "java.io.FileReader": - case "file": { + // java.io.FileReader + case TYPE_FILE: { if (value == null) { throw new TextIndexException("Value for file param must exist and must contain a file name."); @@ -267,8 +292,8 @@ private ParamSpec getParamSpec(Resource node) { } } - // "org.apache.lucene.analysis.util.CharArraySet": - case "set": { + // org.apache.lucene.analysis.util.CharArraySet + case TYPE_SET: { if (valueStmt == null) { throw new TextIndexException("A set param spec must have a text:paramValue:" + node); } @@ -283,8 +308,8 @@ private ParamSpec getParamSpec(Resource node) { return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class); } - // "int": - case "int": + // int + case TYPE_INT: if (value == null) { throw new TextIndexException("Value for int param: " + name + " must not be empty!"); } @@ -292,8 +317,8 @@ private ParamSpec getParamSpec(Resource node) { int n = ((Literal) valueStmt.getObject()).getInt(); return new ParamSpec(name, n, int.class); - // "boolean": - case "boolean": + // boolean + case TYPE_BOOL: if (value == null) { throw new TextIndexException("Value for boolean param: " + name + " must not be empty!"); } @@ -301,6 +326,20 @@ private ParamSpec getParamSpec(Resource node) { boolean b = ((Literal) valueStmt.getObject()).getBoolean(); return new ParamSpec(name, b, boolean.class); + // org.apache.lucene.analysis.Analyzer + case TYPE_ANALYZER: + if (valueStmt == null) { + throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node); + } + + RDFNode valueNode = valueStmt.getObject(); + if (!valueNode.isResource()) { + throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode); + } + + Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode); + return new ParamSpec(name, analyzer, Analyzer.class); + default: // there was no match Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value); From 94b41be7553a4f955c0e41c868d94662bdd7236e Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Sat, 22 Apr 2017 12:29:47 -0500 Subject: [PATCH 06/13] added more tests --- .../TestGenericAnalyzerAssembler.java | 121 ++++++++++++++++-- jena-text/testing/some-stop-words.txt | 6 + 2 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 jena-text/testing/some-stop-words.txt diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java index e2a48938b2f..2ddfa31e959 100644 --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java @@ -20,21 +20,22 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; public class TestGenericAnalyzerAssembler { - -// // Suppress warnings -// @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); } -// @AfterClass public static void afterClass() { LogCtl.setInfo(EntityDefinitionAssembler.class); } private static final String TESTBASE = "http://example.org/test/"; private static final Resource spec1; private static final Resource spec2; private static final Resource spec3; + private static final Resource spec4; + private static final Resource spec5; + private static final Resource spec6; @Test public void AnalyzerNullaryCtor() { GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); @@ -54,15 +55,38 @@ public class TestGenericAnalyzerAssembler { assertEquals(FrenchAnalyzer.class, analyzer.getClass()); } + @Test public void AnalyzerCtorAnalyzerInt() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec4, null); + assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass()); + } + + @Test public void AnalyzerCtorShingle7() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec5, null); + assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass()); + } + + @Test public void AnalyzerCtorFile() { + GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler(); + Analyzer analyzer = gaAssem.open(null, spec6, null); + assertEquals(StopAnalyzer.class, analyzer.getClass()); + } + private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer"; private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer"; + private static final String CLASS_SHINGLE = "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper"; + private static final String CLASS_STOP = "org.apache.lucene.analysis.core.StopAnalyzer"; + + private static final String FILE_STOPS = "testing/some-stop-words.txt"; - private static final String PARAM_TYPE_BOOL = "boolean"; - private static final String PARAM_TYPE_FILE = "file"; - private static final String PARAM_TYPE_INT = "int"; - private static final String PARAM_TYPE_SET = "set"; - private static final String PARAM_TYPE_STRING = "string"; + private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER; + private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL; + private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE; + private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT; + private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET; + private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING; static { TextAssembler.init(); @@ -100,6 +124,85 @@ public class TestGenericAnalyzerAssembler { .addProperty(TextVocab.pParamValue, strs2list(model, "les le du")) })) ; + + // analyzer spec w/ analyzer param and int + + spec4 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_SHINGLE) + .addProperty(TextVocab.pParams, + model.createList( + new RDFNode[] { + model.createResource() + .addProperty(TextVocab.pParamName, "defaultAnalyzer") + .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER) + .addProperty(TextVocab.pParamValue, + model.createResource() + .addProperty(RDF.type, TextVocab.simpleAnalyzer) + ), + model.createResource() + .addProperty(TextVocab.pParamName, "maxShingleSize") + .addProperty(TextVocab.pParamType, PARAM_TYPE_INT) + .addLiteral(TextVocab.pParamValue, 3) + })) + ; + + // analyzer spec w/ seven params of mixed types + + spec5 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_SHINGLE) + .addProperty(TextVocab.pParams, + model.createList( + new RDFNode[] { + model.createResource() + .addProperty(TextVocab.pParamName, "delegate") + .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER) + .addProperty(TextVocab.pParamValue, + model.createResource() + .addProperty(RDF.type, TextVocab.simpleAnalyzer) + ) , + model.createResource() + .addProperty(TextVocab.pParamName, "minShingleSize") + .addProperty(TextVocab.pParamType, PARAM_TYPE_INT) + .addLiteral(TextVocab.pParamValue, 2) , + model.createResource() + .addProperty(TextVocab.pParamName, "maxShingleSize") + .addProperty(TextVocab.pParamType, PARAM_TYPE_INT) + .addLiteral(TextVocab.pParamValue, 4) , + model.createResource() + .addProperty(TextVocab.pParamName, "tokenSeparator") + .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING) + .addLiteral(TextVocab.pParamValue, "|") , + model.createResource() + .addProperty(TextVocab.pParamName, "outputUnigrams") + .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL) + .addLiteral(TextVocab.pParamValue, false) , + model.createResource() + .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles") + .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL) + .addLiteral(TextVocab.pParamValue, true) , + model.createResource() + .addProperty(TextVocab.pParamName, "fillerToken") + .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING) + .addLiteral(TextVocab.pParamValue, "foo") + })) + ; + + // analyzer spec w/ one file param + + spec6 = model.createResource() + .addProperty(RDF.type, TextVocab.genericAnalyzer) + .addProperty(TextVocab.pClass, CLASS_STOP) + .addProperty(TextVocab.pParams, + model.createList( + new RDFNode[] { + model.createResource() + .addProperty(TextVocab.pParamName, "stopWords") + .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE) + .addProperty(TextVocab.pParamValue, FILE_STOPS) + })) + ; } private static Resource strs2list(Model model, String string) { diff --git a/jena-text/testing/some-stop-words.txt b/jena-text/testing/some-stop-words.txt new file mode 100644 index 00000000000..e648d66f2bb --- /dev/null +++ b/jena-text/testing/some-stop-words.txt @@ -0,0 +1,6 @@ +foo +bar +baz +flip +flop +mop \ No newline at end of file From 57ded6a9c1f7d275de4f8e6294611a869407534d Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Sat, 22 Apr 2017 16:15:58 -0500 Subject: [PATCH 07/13] ignore: organize imports --- .../assembler/TestGenericAnalyzerAssembler.java | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java index 2ddfa31e959..87c5d75f161 100644 --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java @@ -1,35 +1,21 @@ package org.apache.jena.query.text.assembler; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import java.util.List; - -import org.apache.jena.assembler.Assembler; -import org.apache.jena.atlas.logging.Log; -import org.apache.jena.atlas.logging.LogCtl; -import org.apache.jena.query.text.EntityDefinition; -import org.apache.jena.query.text.TextIndexLucene; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; -import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.Resource; import org.apache.jena.vocabulary.RDF; -import org.apache.jena.vocabulary.RDFS; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.Test; public class TestGenericAnalyzerAssembler { - private static final String TESTBASE = "http://example.org/test/"; private static final Resource spec1; private static final Resource spec2; private static final Resource spec3; From a3bb8e41aeaf9be3540cf0a6be84cd9dc9b43b28 Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Sat, 22 Apr 2017 16:31:54 -0500 Subject: [PATCH 08/13] added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2) add analyzers to Multilingual support based on BCP47 codes --- .../apache/jena/query/text/analyzer/Util.java | 4 + .../assembler/DefinedAnalyzerAssembler.java | 100 ++++++++++++++++++ .../query/text/assembler/TextAssembler.java | 1 + .../assembler/TextIndexLuceneAssembler.java | 12 +++ .../jena/query/text/assembler/TextVocab.java | 5 + 5 files changed, 122 insertions(+) create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java index fb2582a5b08..20c757388a4 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java @@ -51,6 +51,10 @@ public static Analyzer getLocalizedAnalyzer(String lang) { return null; } } + + public static void addAnalyzer(String lang, Analyzer analyzer) { + cache.put(lang, analyzer); + } private static void initAnalyzerDefs() { analyzersClasses = new Hashtable<>(); diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java new file mode 100644 index 00000000000..e7bd941ff2c --- /dev/null +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java @@ -0,0 +1,100 @@ +package org.apache.jena.query.text.assembler; + +import java.util.Hashtable; + +import org.apache.jena.assembler.Assembler; +import org.apache.jena.assembler.Mode; +import org.apache.jena.assembler.assemblers.AssemblerBase; +import org.apache.jena.query.text.TextIndexException; +import org.apache.jena.query.text.analyzer.Util; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.vocabulary.RDF; +import org.apache.lucene.analysis.Analyzer; + +public class DefinedAnalyzerAssembler extends AssemblerBase { + + private static Hashtable analyzers = new Hashtable<>(); + + public static void addAnalyzer(Resource key, Analyzer analyzer) { + analyzers.put(key, analyzer); + } + + public static boolean addAnalyzers(Assembler a, Resource list) { + Resource current = list; + boolean isMultilingualSupport = false; + + while (current != null && ! current.equals(RDF.nil)){ + Statement firstStmt = current.getProperty(RDF.first); + if (firstStmt == null) { + throw new TextIndexException("parameter list not well formed: " + current); + } + + RDFNode first = firstStmt.getObject(); + if (! first.isResource()) { + throw new TextIndexException("parameter specification must be an anon resource : " + first); + } + + // process the current list element to add an analyzer + Resource adding = (Resource) first; + if (adding.hasProperty(TextVocab.pAnalyzer)) { + Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer); + RDFNode analyzerNode = analyzerStmt.getObject(); + if (!analyzerNode.isResource()) { + throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode); + } + + Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode); + + if (adding.hasProperty(TextVocab.pAddLang)) { + Statement langStmt = adding.getProperty(TextVocab.pAddLang); + String langCode = langStmt.getString(); + Util.addAnalyzer(langCode, analyzer); + isMultilingualSupport = true; + } + + if (adding.hasProperty(TextVocab.pDefAnalyzer)) { + Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer); + Resource id = defStmt.getResource(); + + if (id.getURI() != null) { + DefinedAnalyzerAssembler.addAnalyzer(id, analyzer); + } else { + throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding); + } + } + } else { + throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding); + } + + Statement restStmt = current.getProperty(RDF.rest); + if (restStmt == null) { + throw new TextIndexException("parameter list not terminated by rdf:nil"); + } + + RDFNode rest = restStmt.getObject(); + if (! rest.isResource()) { + throw new TextIndexException("parameter list node is not a resource : " + rest); + } + + current = (Resource) rest; + } + + return isMultilingualSupport; + } + + @Override + public Object open(Assembler a, Resource root, Mode mode) { + + if (root.hasProperty(TextVocab.pUseAnalyzer)) { + Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer); + Resource key = useStmt.getResource(); + + return analyzers.get(key); + } + + return null; + } + +} diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java index 45f5cee7cec..6cbb2dae00e 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java @@ -38,6 +38,7 @@ public static void init() Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ; Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ; + Assembler.general.implementWith(TextVocab.definedAnalyzer, new DefinedAnalyzerAssembler()) ; } } diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java index 0ec1e5b8350..7acfb9eb47c 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java @@ -115,6 +115,18 @@ public TextIndex open(Assembler a, Resource root, Mode mode) { } isMultilingualSupport = mlsNode.asLiteral().getBoolean(); } + + Statement defAnalyzersStatement = root.getProperty(pDefAnalyzers); + if (null != defAnalyzersStatement) { + RDFNode aNode = defAnalyzersStatement.getObject(); + if (! aNode.isResource()) { + throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode); + } + boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode); + if (addedLangs) { + isMultilingualSupport = true; + } + } boolean storeValues = false; Statement storeValuesStatement = root.getProperty(pStoreValues); diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java index cd1844d2cd3..b051252db95 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java @@ -88,10 +88,15 @@ public class TextVocab //GenericAnalyzer public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer"); + public static final Resource definedAnalyzer = Vocab.resource(NS, "DefinedAnalyzer"); public static final Property pClass = Vocab.property(NS, "class"); public static final Property pParams = Vocab.property(NS, "params"); public static final Property pParamName = Vocab.property(NS, "paramName"); public static final Property pParamType = Vocab.property(NS, "paramType"); public static final Property pParamValue = Vocab.property(NS, "paramValue"); + public static final Property pDefAnalyzers = Vocab.property(NS, "defineAnalyzers"); + public static final Property pDefAnalyzer = Vocab.property(NS, "defineAnalyzer"); + public static final Property pAddLang = Vocab.property(NS, "addLang"); + public static final Property pUseAnalyzer = Vocab.property(NS, "useAnalyzer"); } From 311efab2fd26a58406b29b64d74b41039292d080 Mon Sep 17 00:00:00 2001 From: Chris Tomlinson Date: Sun, 23 Apr 2017 09:18:35 -0500 Subject: [PATCH 09/13] represent parameter types as resources like text:TypeSet instead of literal string --- .../assembler/GenericAnalyzerAssembler.java | 208 +++++++++--------- .../jena/query/text/assembler/TextVocab.java | 6 + .../TestGenericAnalyzerAssembler.java | 29 +-- 3 files changed, 125 insertions(+), 118 deletions(-) diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java index 853fcb614d7..4f10b85e5bf 100644 --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java @@ -43,12 +43,12 @@ *

    * The parameters may be of the following types: *

    - *     string    String
    - *     set       org.apache.lucene.analysis.util.CharArraySet
    - *     file      java.io.FileReader
    - *     int       int
    - *     boolean   boolean
    - *     analyzer  org.apache.lucene.analysis.Analyzer
    + *     text:TypeString    String
    + *     text:TypeSet       org.apache.lucene.analysis.util.CharArraySet
    + *     text:TypeFile      java.io.FileReader
    + *     text:TypeInt       int
    + *     text:TypeBoolean   boolean
    + *     text:TypeAnalyzer  org.apache.lucene.analysis.Analyzer
      * 
    * * Although the list of types is not exhaustive it is a simple matter @@ -74,15 +74,18 @@ *
      *
    • an optional text:paramName that may be used to document which * parameter is represented
    • - *
    • a text:paramType which is one of: string, - * set, file, int, boolean.
    • - *
    • a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.
    • + *
    • a text:paramType which is one of: text:TypeString, + * text:TypeSet, text:TypeFile, text:TypeInt, + * text:TypeBoolean, text:TypeAnalyzer.
    • + *
    • a text:paramValue which is an xsd:string, xsd:boolean or xsd:int or resource.
    • *
    *

    - * A parameter of type set must have a list of zero or more Strings. + * A parameter of type text:TypeSet must have a list of zero or + * more Strings. *

    - * A parameter of type string, file, boolean, or - * int must have a single text:paramValue of the appropriate type. + * A parameter of type text:TypeString, text:TypeFile, + * text:TypeBoolean, text:TypeInt or text:TypeAnalyzer + * must have a single text:paramValue of the appropriate type. *

    * Examples: *

    @@ -94,10 +97,10 @@
                    text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                    text:params (
                         [ text:paramName "stopwords" ;
    -                      text:paramType "set" ;
    +                      text:paramType text:TypeSet ;
                           text:paramValue ("the" "a" "an") ]
                         [ text:paramName "stemExclusionSet" ;
    -                      text:paramType "set" ;
    +                      text:paramType text:TypeSet ;
                           text:paramValue ("ing" "ed") ]
                         )
                ] .
    @@ -111,10 +114,10 @@
                    text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
                    text:params (
                         [ text:paramName "defaultAnalyzer" ;
    -                      text:paramType "analyzer" ;
    +                      text:paramType text:TypeAnalyzer ;
                           text:paramValue [ a text:SimpleAnalyzer ] ]
                         [ text:paramName "maxShingleSize" ;
    -                      text:paramType "int" ;
    +                      text:paramType text:TypeInt ;
                           text:paramValue 3 ]
                         )
                ] .
    @@ -130,71 +133,71 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                    text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                    text:params (
                         [ text:paramName "stopwords" ;
    -                      text:paramType "set" ;
    +                      text:paramType text:TypeSet ;
                           text:paramValue ("the" "a" "an") ]
                         [ text:paramName "stemExclusionSet" ;
    -                      text:paramType "set" ;
    +                      text:paramType text:TypeSet ;
                           text:paramValue ("ing" "ed") ]
                         )
                ] .
          */
     
    -    public static final String TYPE_ANALYZER = "analyzer";
    -    public static final String TYPE_BOOL = "boolean";
    -    public static final String TYPE_FILE = "file";
    -    public static final String TYPE_INT = "int";
    -    public static final String TYPE_SET = "set";
    -    public static final String TYPE_STRING = "string";
    +    public static final String TYPE_ANALYZER   = "TypeAnalyzer";
    +    public static final String TYPE_BOOL       = "TypeBoolean";
    +    public static final String TYPE_FILE       = "TypeFile";
    +    public static final String TYPE_INT        = "TypeInt";
    +    public static final String TYPE_SET        = "TypeSet";
    +    public static final String TYPE_STRING     = "TypeString";
     
         @Override
    -	public Analyzer open(Assembler a, Resource root, Mode mode) {
    -	    if (root.hasProperty(TextVocab.pClass)) {
    -	        // text:class is expected to be a string literal
    -	        String className = root.getProperty(TextVocab.pClass).getString();
    -
    -	        // is the class accessible?
    -	        Class clazz = null;
    -	        try {
    -	            clazz = Class.forName(className);
    -	        } catch (ClassNotFoundException e) {
    -	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
    -	            return null;
    -	        }
    -
    -	        // Is the class an Analyzer?
    -	        if (!Analyzer.class.isAssignableFrom(clazz)) {
    -	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
    -	            return null;
    -	        }
    -	        
    -	        if (root.hasProperty(TextVocab.pParams)) {
    -	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
    -	            if (! node.isResource()) {
    -	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
    -	            }
    -
    -	            List specs = getParamSpecs((Resource) node);
    -
    -	            // split the param specs into classes and values for constructor lookup
    -	            final Class paramClasses[] = new Class[specs.size()];
    -	            final Object paramValues[] = new Object[specs.size()];
    -	            for (int i = 0; i < specs.size(); i++) {
    -	                ParamSpec spec = specs.get(i);
    -	                paramClasses[i] = spec.getValueClass();
    -	                paramValues[i] = spec.getValue();
    -	            }
    -
    -	            // Create new analyzer
    -	            return newAnalyzer(clazz, paramClasses, paramValues);
    -
    -	        } else {
    -	            // use the nullary Analyzer constructor
    -	            return newAnalyzer(clazz, new Class[0], new Object[0]);
    -	        }
    -	    } else {
    -	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
    -	    }
    -	}
    +    public Analyzer open(Assembler a, Resource root, Mode mode) {
    +        if (root.hasProperty(TextVocab.pClass)) {
    +            // text:class is expected to be a string literal
    +            String className = root.getProperty(TextVocab.pClass).getString();
    +
    +            // is the class accessible?
    +            Class clazz = null;
    +            try {
    +                clazz = Class.forName(className);
    +            } catch (ClassNotFoundException e) {
    +                Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
    +                return null;
    +            }
    +
    +            // Is the class an Analyzer?
    +            if (!Analyzer.class.isAssignableFrom(clazz)) {
    +                Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
    +                return null;
    +            }
    +
    +            if (root.hasProperty(TextVocab.pParams)) {
    +                RDFNode node = root.getProperty(TextVocab.pParams).getObject();
    +                if (! node.isResource()) {
    +                    throw new TextIndexException("text:params must be a list of parameter resources: " + node);
    +                }
    +
    +                List specs = getParamSpecs((Resource) node);
    +
    +                // split the param specs into classes and values for constructor lookup
    +                final Class paramClasses[] = new Class[specs.size()];
    +                final Object paramValues[] = new Object[specs.size()];
    +                for (int i = 0; i < specs.size(); i++) {
    +                    ParamSpec spec = specs.get(i);
    +                    paramClasses[i] = spec.getValueClass();
    +                    paramValues[i] = spec.getValue();
    +                }
    +
    +                // Create new analyzer
    +                return newAnalyzer(clazz, paramClasses, paramValues);
    +
    +            } else {
    +                // use the nullary Analyzer constructor
    +                return newAnalyzer(clazz, new Class[0], new Object[0]);
    +            }
    +        } else {
    +            throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
    +        }
    +    }
     
         /**
          * Create instance of the Lucene Analyzer, class, with provided parameters
    @@ -221,47 +224,52 @@ private Analyzer newAnalyzer(Class clazz, Class[] paramClasses, Object[] p
     
             return null;
         }
    -    
    +
         private List getParamSpecs(Resource list) {
             List result = new ArrayList<>();
             Resource current = list;
    -        
    +
             while (current != null && ! current.equals(RDF.nil)){
                 Statement firstStmt = current.getProperty(RDF.first);
                 if (firstStmt == null) {
                     throw new TextIndexException("parameter list not well formed: " + current);
                 }
    -            
    +
                 RDFNode first = firstStmt.getObject();
                 if (! first.isResource()) {
                     throw new TextIndexException("parameter specification must be an anon resource : " + first);
                 }
     
                 result.add(getParamSpec((Resource) first));
    -            
    +
                 Statement restStmt = current.getProperty(RDF.rest);
                 if (restStmt == null) {
                     throw new TextIndexException("parameter list not terminated by rdf:nil");
                 }
    -            
    +
                 RDFNode rest = restStmt.getObject();
                 if (! rest.isResource()) {
                     throw new TextIndexException("parameter list node is not a resource : " + rest);
                 }
    -            
    +
                 current = (Resource) rest;
             }
    -        
    +
             return result;
         }
    -    
    +
         private ParamSpec getParamSpec(Resource node) {
             Statement nameStmt = node.getProperty(TextVocab.pParamName);
             Statement typeStmt = node.getProperty(TextVocab.pParamType);
             Statement valueStmt = node.getProperty(TextVocab.pParamValue);
             
    +        if (typeStmt == null) {
    +            throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
    +        }        
    +        Resource typeRes = typeStmt.getResource();
    +        String type = typeRes.getLocalName();
    +
             String name = getStringValue(nameStmt);
    -        String type = getStringValue(typeStmt);
             String value = getStringValue(valueStmt);
     
             switch (type) {
    @@ -274,7 +282,7 @@ private ParamSpec getParamSpec(Resource node) {
     
                 return new ParamSpec(name, value, String.class);
             }
    -        
    +
             // java.io.FileReader
             case TYPE_FILE: {
     
    @@ -291,23 +299,23 @@ private ParamSpec getParamSpec(Resource node) {
                     throw new TextIndexException("File " + value + " for param " + name + " not found!");
                 }
             }
    -        
    +
             // org.apache.lucene.analysis.util.CharArraySet
             case TYPE_SET: {
                 if (valueStmt == null) {
                     throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
                 }
    -            
    +
                 RDFNode valueNode = valueStmt.getObject();
                 if (!valueNode.isResource()) {
                     throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
                 }
    -            
    +
                 List values = toStrings((Resource) valueNode);
     
                 return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
             }
    -        
    +
             // int
             case TYPE_INT:
                 if (value == null) {
    @@ -317,7 +325,7 @@ private ParamSpec getParamSpec(Resource node) {
                 int n = ((Literal) valueStmt.getObject()).getInt();
                 return new ParamSpec(name, n, int.class);
     
    -        // boolean
    +            // boolean
             case TYPE_BOOL:
                 if (value == null) {
                     throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
    @@ -325,21 +333,21 @@ private ParamSpec getParamSpec(Resource node) {
     
                 boolean b = ((Literal) valueStmt.getObject()).getBoolean();
                 return new ParamSpec(name, b, boolean.class);
    -        
    -        // org.apache.lucene.analysis.Analyzer
    +
    +            // org.apache.lucene.analysis.Analyzer
             case TYPE_ANALYZER:
                 if (valueStmt == null) {
                     throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
                 }
    -            
    +
                 RDFNode valueNode = valueStmt.getObject();
                 if (!valueNode.isResource()) {
                     throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
                 }
    -            
    +
                 Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
                 return new ParamSpec(name, analyzer, Analyzer.class);
    -        
    +
             default:
                 // there was no match
                 Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
    @@ -348,7 +356,7 @@ private ParamSpec getParamSpec(Resource node) {
     
             return null;
         }
    -    
    +
         private String getStringValue(Statement stmt) {
             if (stmt == null) {
                 return null;
    @@ -365,33 +373,33 @@ private String getStringValue(Statement stmt) {
         private List toStrings(Resource list) {
             List result = new ArrayList<>();
             Resource current = list;
    -        
    +
             while (current != null && ! current.equals(RDF.nil)){
                 Statement firstStmt = current.getProperty(RDF.first);
                 if (firstStmt == null) {
                     throw new TextIndexException("param spec of type set not well formed");
                 }
    -            
    +
                 RDFNode first = firstStmt.getObject();
                 if (! first.isLiteral()) {
                     throw new TextIndexException("param spec of type set item is not a literal: " + first);
                 }
    -            
    +
                 result.add(((Literal)first).getLexicalForm());
    -            
    +
                 Statement restStmt = current.getProperty(RDF.rest);
                 if (restStmt == null) {
                     throw new TextIndexException("param spec of type set not terminated by rdf:nil");
                 }
    -            
    +
                 RDFNode rest = restStmt.getObject();
                 if (! rest.isResource()) {
                     throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
                 }
    -            
    +
                 current = (Resource) rest;
             }
    -        
    +
             return result;
         }
     
    diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
    index b051252db95..78cf0c0ad3d 100644
    --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
    +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
    @@ -89,6 +89,12 @@ public class TextVocab
         //GenericAnalyzer
         public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
         public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
    +    public static final Resource typeAnalyzer       = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_ANALYZER);
    +    public static final Resource typeBoolean        = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_BOOL);
    +    public static final Resource typeFile           = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_FILE);
    +    public static final Resource typeInt            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_INT);
    +    public static final Resource typeSet            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_SET);
    +    public static final Resource typeString         = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_STRING);
         public static final Property pClass             = Vocab.property(NS, "class");
         public static final Property pParams            = Vocab.property(NS, "params");
         public static final Property pParamName         = Vocab.property(NS, "paramName");
    diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    index 87c5d75f161..3effc39bb01 100644
    --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    @@ -67,13 +67,6 @@ public class TestGenericAnalyzerAssembler {
         
         private static final String FILE_STOPS = "testing/some-stop-words.txt";
         
    -    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
    -    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
    -    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
    -    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
    -    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
    -    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
    -    
         static {
             TextAssembler.init();
             Model model = ModelFactory.createDefaultModel();
    @@ -106,7 +99,7 @@ public class TestGenericAnalyzerAssembler {
                                               new RDFNode[] { 
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "stopWords")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeSet)
                                                       .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                               }))
                          ;
    @@ -121,14 +114,14 @@ public class TestGenericAnalyzerAssembler {
                                               new RDFNode[] { 
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "defaultAnalyzer")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                       .addProperty(TextVocab.pParamValue, 
                                                                    model.createResource()
                                                                    .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                    ),
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "maxShingleSize")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                       .addLiteral(TextVocab.pParamValue, 3)
                                               }))
                          ;
    @@ -143,34 +136,34 @@ public class TestGenericAnalyzerAssembler {
                                               new RDFNode[] { 
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "delegate")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                       .addProperty(TextVocab.pParamValue, 
                                                                    model.createResource()
                                                                    .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                    ) ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "minShingleSize")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                       .addLiteral(TextVocab.pParamValue, 2) ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "maxShingleSize")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                       .addLiteral(TextVocab.pParamValue, 4) ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "tokenSeparator")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                       .addLiteral(TextVocab.pParamValue, "|") ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "outputUnigrams")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                       .addLiteral(TextVocab.pParamValue, false) ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                       .addLiteral(TextVocab.pParamValue, true) ,
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "fillerToken")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                       .addLiteral(TextVocab.pParamValue, "foo")
                                               }))
                          ;
    @@ -185,7 +178,7 @@ public class TestGenericAnalyzerAssembler {
                                               new RDFNode[] { 
                                                       model.createResource()
                                                       .addProperty(TextVocab.pParamName, "stopWords")
    -                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
    +                                                  .addProperty(TextVocab.pParamType, TextVocab.typeFile)
                                                       .addProperty(TextVocab.pParamValue, FILE_STOPS)
                                               }))
                          ;
    
    From 5edb6c8758124fe8dd5a96d7b92949fc3ac1f61f Mon Sep 17 00:00:00 2001
    From: Chris Tomlinson 
    Date: Sun, 23 Apr 2017 10:13:09 -0500
    Subject: [PATCH 10/13] factor DefinedAnalyzerAssembler and
     DefineAnalyzersAssembler into separate classes; move defined analyzer cache
     to Utils along side the language tagged analyzers since both caches have the
     same lifetime and similar uses.
    
    ---
     .../apache/jena/query/text/analyzer/Util.java |  12 ++
     .../assembler/DefineAnalyzersAssembler.java   | 105 ++++++++++++++++++
     .../assembler/DefinedAnalyzerAssembler.java   | 103 +++++------------
     .../assembler/TextIndexLuceneAssembler.java   |  10 +-
     4 files changed, 152 insertions(+), 78 deletions(-)
     create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
    
    diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
    index 20c757388a4..6ad0747d769 100644
    --- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
    +++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
    @@ -18,6 +18,7 @@
     
     package org.apache.jena.query.text.analyzer;
     
    +import org.apache.jena.rdf.model.Resource;
     import org.apache.lucene.analysis.Analyzer;
     import java.lang.reflect.Constructor;
     import java.util.Hashtable;
    @@ -26,6 +27,9 @@ public class Util {
     
         private static Hashtable> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
         private static Hashtable cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
    +    
    +    // cache of defined text:defineAnalyzers
    +    private static Hashtable definedAnalyzers = new Hashtable<>();
     
         static {
             initAnalyzerDefs();
    @@ -55,6 +59,14 @@ public static Analyzer getLocalizedAnalyzer(String lang) {
         public static void addAnalyzer(String lang, Analyzer analyzer) {
             cache.put(lang, analyzer);
         }
    +    
    +    public static Analyzer getDefinedAnalyzer(Resource key) {
    +        return definedAnalyzers.get(key.getURI());
    +    }
    +    
    +    public static void defineAnalyzer(Resource key, Analyzer analyzer) {
    +        definedAnalyzers.put(key.getURI(), analyzer);
    +    }
     
         private static void initAnalyzerDefs() {
             analyzersClasses = new Hashtable<>();
    diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
    new file mode 100644
    index 00000000000..11270e2bac8
    --- /dev/null
    +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
    @@ -0,0 +1,105 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.jena.query.text.assembler;
    +
    +import org.apache.jena.assembler.Assembler;
    +import org.apache.jena.query.text.TextIndexException;
    +import org.apache.jena.query.text.analyzer.Util;
    +import org.apache.jena.rdf.model.RDFNode;
    +import org.apache.jena.rdf.model.Resource;
    +import org.apache.jena.rdf.model.Statement;
    +import org.apache.jena.vocabulary.RDF;
    +import org.apache.lucene.analysis.Analyzer;
    +
    +public class DefineAnalyzersAssembler {
    +    /*
    +    <#indexLucene> a text:TextIndexLucene ;
    +        text:directory  ;
    +        text:entityMap <#entMap> ;
    +        text:defineAnalyzers (
    +            [text:addLang "sa-x-iast" ;
    +             text:analyzer [ . . . ]]
    +            [text:defineAnalyzer <#foo> ;
    +             text:analyzer [ . . . ]]
    +        )
    +    */
    +
    +    public static boolean open(Assembler a, Resource list) {
    +        Resource current = list;
    +        boolean isMultilingualSupport = false;
    +        
    +        while (current != null && ! current.equals(RDF.nil)){
    +            Statement firstStmt = current.getProperty(RDF.first);
    +            if (firstStmt == null) {
    +                throw new TextIndexException("parameter list not well formed: " + current);
    +            }
    +            
    +            RDFNode first = firstStmt.getObject();
    +            if (! first.isResource()) {
    +                throw new TextIndexException("parameter specification must be an anon resource : " + first);
    +            }
    +
    +            // process the current list element to add an analyzer 
    +            Resource adding = (Resource) first;
    +            if (adding.hasProperty(TextVocab.pAnalyzer)) {
    +                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
    +                RDFNode analyzerNode = analyzerStmt.getObject();
    +                if (!analyzerNode.isResource()) {
    +                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
    +                }
    +                
    +                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
    +                
    +                if (adding.hasProperty(TextVocab.pAddLang)) {
    +                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
    +                    String langCode = langStmt.getString();
    +                    Util.addAnalyzer(langCode, analyzer);
    +                    isMultilingualSupport = true;
    +                }
    +                
    +                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
    +                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
    +                    Resource id = defStmt.getResource();
    +                    
    +                    if (id.getURI() != null) {
    +                        Util.defineAnalyzer(id, analyzer);
    +                    } else {
    +                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
    +                    }
    +                }
    +            } else {
    +                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
    +            }
    +            
    +            Statement restStmt = current.getProperty(RDF.rest);
    +            if (restStmt == null) {
    +                throw new TextIndexException("parameter list not terminated by rdf:nil");
    +            }
    +            
    +            RDFNode rest = restStmt.getObject();
    +            if (! rest.isResource()) {
    +                throw new TextIndexException("parameter list node is not a resource : " + rest);
    +            }
    +            
    +            current = (Resource) rest;
    +        }
    +        
    +        return isMultilingualSupport;
    +    }
    +}
    diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
    index e7bd941ff2c..e6909ac62e5 100644
    --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
    +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
    @@ -1,88 +1,39 @@
    -package org.apache.jena.query.text.assembler;
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
     
    -import java.util.Hashtable;
    +package org.apache.jena.query.text.assembler;
     
     import org.apache.jena.assembler.Assembler;
     import org.apache.jena.assembler.Mode;
     import org.apache.jena.assembler.assemblers.AssemblerBase;
    -import org.apache.jena.query.text.TextIndexException;
     import org.apache.jena.query.text.analyzer.Util;
    -import org.apache.jena.rdf.model.RDFNode;
     import org.apache.jena.rdf.model.Resource;
     import org.apache.jena.rdf.model.Statement;
    -import org.apache.jena.vocabulary.RDF;
    -import org.apache.lucene.analysis.Analyzer;
     
     public class DefinedAnalyzerAssembler extends AssemblerBase {
    -    
    -    private static Hashtable analyzers = new Hashtable<>();
    -    
    -    public static void addAnalyzer(Resource key, Analyzer analyzer) {
    -        analyzers.put(key, analyzer);
    -    }
    -    
    -    public static boolean addAnalyzers(Assembler a, Resource list) {
    -        Resource current = list;
    -        boolean isMultilingualSupport = false;
    -        
    -        while (current != null && ! current.equals(RDF.nil)){
    -            Statement firstStmt = current.getProperty(RDF.first);
    -            if (firstStmt == null) {
    -                throw new TextIndexException("parameter list not well formed: " + current);
    -            }
    -            
    -            RDFNode first = firstStmt.getObject();
    -            if (! first.isResource()) {
    -                throw new TextIndexException("parameter specification must be an anon resource : " + first);
    -            }
    -
    -            // process the current list element to add an analyzer 
    -            Resource adding = (Resource) first;
    -            if (adding.hasProperty(TextVocab.pAnalyzer)) {
    -                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
    -                RDFNode analyzerNode = analyzerStmt.getObject();
    -                if (!analyzerNode.isResource()) {
    -                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
    -                }
    -                
    -                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
    -                
    -                if (adding.hasProperty(TextVocab.pAddLang)) {
    -                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
    -                    String langCode = langStmt.getString();
    -                    Util.addAnalyzer(langCode, analyzer);
    -                    isMultilingualSupport = true;
    -                }
    -                
    -                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
    -                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
    -                    Resource id = defStmt.getResource();
    -                    
    -                    if (id.getURI() != null) {
    -                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
    -                    } else {
    -                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
    -                    }
    -                }
    -            } else {
    -                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
    -            }
    -            
    -            Statement restStmt = current.getProperty(RDF.rest);
    -            if (restStmt == null) {
    -                throw new TextIndexException("parameter list not terminated by rdf:nil");
    -            }
    -            
    -            RDFNode rest = restStmt.getObject();
    -            if (! rest.isResource()) {
    -                throw new TextIndexException("parameter list node is not a resource : " + rest);
    -            }
    -            
    -            current = (Resource) rest;
    -        }
    -        
    -        return isMultilingualSupport;
    -    }
    +    /*
    +    text:map (
    +         [ text:field "text" ; 
    +           text:predicate rdfs:label;
    +           text:analyzer [
    +               a text:DefinedAnalyzer ;
    +               text:useAnalyzer <#Foo> ]
    +     */
        
         @Override
         public Object open(Assembler a, Resource root, Mode mode) {
    @@ -91,7 +42,7 @@ public Object open(Assembler a, Resource root, Mode mode) {
                 Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
                 Resource key = useStmt.getResource();
                 
    -            return analyzers.get(key);
    +            return Util.getDefinedAnalyzer(key);
             }
             
             return null;
    diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
    index 7acfb9eb47c..14af9bf9d9b 100644
    --- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
    +++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
    @@ -26,6 +26,7 @@
     import org.apache.jena.assembler.assemblers.AssemblerBase ;
     import org.apache.jena.atlas.io.IO ;
     import org.apache.jena.atlas.lib.IRILib ;
    +import org.apache.jena.atlas.logging.Log;
     import org.apache.jena.query.text.*;
     import org.apache.jena.rdf.model.RDFNode ;
     import org.apache.jena.rdf.model.Resource ;
    @@ -120,10 +121,15 @@ public TextIndex open(Assembler a, Resource root, Mode mode) {
                 if (null != defAnalyzersStatement) {
                     RDFNode aNode = defAnalyzersStatement.getObject();
                     if (! aNode.isResource()) {
    -                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
    +                    throw new TextIndexException("text:defineAnalyzers property is not a resource (list) : " + aNode);
                     }
    -                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
    +                boolean addedLangs = DefineAnalyzersAssembler.open(a, (Resource) aNode);
    +                // if the text:defineAnalyzers added any analyzers to lang tags then ensure that
    +                // multilingual support is enabled
                     if (addedLangs) {
    +                    if (!isMultilingualSupport) {
    +                        Log.warn(this,  "Multilingual support implicitly enabled by text:defineAnalyzers");
    +                    }
                         isMultilingualSupport = true;
                     }
                 }
    
    From fef4d22faeda09159cc2523e477571d1d23a85e7 Mon Sep 17 00:00:00 2001
    From: Chris Tomlinson 
    Date: Sun, 23 Apr 2017 11:20:53 -0500
    Subject: [PATCH 11/13] ignore extras
    
    ---
     jena-text/.gitignore | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/jena-text/.gitignore b/jena-text/.gitignore
    index e69de29bb2d..f7b49ee3ae3 100644
    --- a/jena-text/.gitignore
    +++ b/jena-text/.gitignore
    @@ -0,0 +1 @@
    +/text-query.mdtext
    
    From 76c3ae3e9f2f35303914258d92eba47eae8e8a75 Mon Sep 17 00:00:00 2001
    From: Chris Tomlinson 
    Date: Wed, 28 Jun 2017 08:22:52 -0500
    Subject: [PATCH 12/13] delete text-query.mdtext; update .gitignore; add
     license and JenaSystem.init()
    
    ---
     jena-text/.gitignore                          |  2 +-
     .../TestGenericAnalyzerAssembler.java         | 20 +++++++++++++++++++
     2 files changed, 21 insertions(+), 1 deletion(-)
    
    diff --git a/jena-text/.gitignore b/jena-text/.gitignore
    index f7b49ee3ae3..6c7b69a0156 100644
    --- a/jena-text/.gitignore
    +++ b/jena-text/.gitignore
    @@ -1 +1 @@
    -/text-query.mdtext
    +.gitignore
    diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    index 3effc39bb01..bcd5a122185 100644
    --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
    @@ -1,3 +1,21 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
     package org.apache.jena.query.text.assembler;
     
     import static org.junit.Assert.assertEquals;
    @@ -6,6 +24,7 @@
     import org.apache.jena.rdf.model.ModelFactory;
     import org.apache.jena.rdf.model.RDFNode;
     import org.apache.jena.rdf.model.Resource;
    +import org.apache.jena.system.JenaSystem;
     import org.apache.jena.vocabulary.RDF;
     import org.apache.lucene.analysis.Analyzer;
     import org.apache.lucene.analysis.core.SimpleAnalyzer;
    @@ -68,6 +87,7 @@ public class TestGenericAnalyzerAssembler {
         private static final String FILE_STOPS = "testing/some-stop-words.txt";
         
         static {
    +        JenaSystem.init();
             TextAssembler.init();
             Model model = ModelFactory.createDefaultModel();
             
    
    From ece2f41d8a381ee961dc61e8fafd66f5925bb0fa Mon Sep 17 00:00:00 2001
    From: Chris Tomlinson 
    Date: Wed, 28 Jun 2017 09:36:29 -0500
    Subject: [PATCH 13/13] added JenaSystem.init() to other jena-text tests
    
    ---
     .../jena/query/text/assembler/TestEntityMapAssembler.java       | 2 ++
     .../jena/query/text/assembler/TestTextDatasetAssembler.java     | 2 ++
     .../jena/query/text/assembler/TestTextIndexLuceneAssembler.java | 2 ++
     3 files changed, 6 insertions(+)
    
    diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
    index cdf5dcc41e5..99a3e5645c4 100644
    --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
    +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
    @@ -32,6 +32,7 @@
     import org.apache.jena.query.text.analyzer.ConfigurableAnalyzer ;
     import org.apache.jena.query.text.analyzer.LowerCaseKeywordAnalyzer ;
     import org.apache.jena.rdf.model.* ;
    +import org.apache.jena.system.JenaSystem;
     import org.apache.jena.vocabulary.RDF ;
     import org.apache.jena.vocabulary.RDFS ;
     import org.apache.lucene.analysis.core.KeywordAnalyzer ;
    @@ -163,6 +164,7 @@ private static Object getOne(EntityDefinition entityDef, String field) {
         private static final Property SPEC2_PREDICATE1 = RDFS.label;
         private static final Property SPEC2_PREDICATE2 = RDFS.comment;
         static {
    +        JenaSystem.init();
             TextAssembler.init();
             Model model = ModelFactory.createDefaultModel();
             
    diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
    index dbef4df3a19..d6cb2ce2f2e 100644
    --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
    +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
    @@ -33,6 +33,7 @@
     import org.apache.jena.sparql.core.DatasetGraph ;
     import org.apache.jena.sparql.core.Quad ;
     import org.apache.jena.sparql.core.QuadAction ;
    +import org.apache.jena.system.JenaSystem;
     import org.apache.jena.tdb.assembler.AssemblerTDB ;
     import org.apache.jena.vocabulary.RDF ;
     import org.junit.Test ;
    @@ -93,6 +94,7 @@ public void testCustomTextDocProducerDyadicConstructor() {
         }
     
         static {
    +        JenaSystem.init();
             TextAssembler.init();
             AssemblerTDB.init();
             spec1 =
    diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
    index 67426615613..53d2eafb01e 100644
    --- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
    +++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
    @@ -21,6 +21,7 @@
     import org.apache.jena.assembler.Assembler ;
     import org.apache.jena.query.text.TextIndexLucene ;
     import org.apache.jena.rdf.model.Resource ;
    +import org.apache.jena.system.JenaSystem;
     import org.apache.jena.vocabulary.RDFS ;
     import org.apache.lucene.analysis.core.KeywordAnalyzer ;
     import org.apache.lucene.store.RAMDirectory ;
    @@ -99,6 +100,7 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
         }
     
         static {
    +        JenaSystem.init();
             TextAssembler.init();
         }