From 1440e81d75ee01baf874c407d5f0017bc59c6787 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Mon, 17 Apr 2017 14:53:41 -0500
Subject: [PATCH 01/13] initial commit for generic analyzers

---
 .../assembler/GenericAnalyzerAssembler.java   | 20 +++++++++++++++++++
 .../query/text/assembler/TextAssembler.java   |  1 +
 .../jena/query/text/assembler/TextVocab.java  |  1 +
 3 files changed, 22 insertions(+)
 create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
new file mode 100644
index 00000000000..5c25cb2f234
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -0,0 +1,20 @@
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.rdf.model.Resource;
+
+public class GenericAnalyzerAssembler extends AssemblerBase {
+
+	public GenericAnalyzerAssembler() {
+		// TODO Auto-generated constructor stub
+	}
+
+	@Override
+	public Object open(Assembler a, Resource root, Mode mode) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+}
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 80b2f7e3259..636c6bc3929 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -37,6 +37,7 @@ public static void init()
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ;
 
     }
 }
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 719d40469aa..bc49d1048b3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -66,6 +66,7 @@ public class TextVocab
     public static final Resource lowerCaseKeywordAnalyzer    = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
     public static final Resource localizedAnalyzer    = Vocab.resource(NS, "LocalizedAnalyzer");
     public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer");
+    public static final Resource genericAnalyzer   = Vocab.resource(NS, "GenericAnalyzer");
     
     // Tokenizers
     public static final Resource standardTokenizer  = Vocab.resource(NS, "StandardTokenizer");

From 8b3757bae52d08d4b308bd0f996ff452c60cc7c9 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Wed, 19 Apr 2017 14:43:04 -0500
Subject: [PATCH 02/13] initial documentation

---
 .../assembler/GenericAnalyzerAssembler.java   | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 5c25cb2f234..db707d2b242 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.assembler.Assembler;
@@ -5,7 +23,74 @@
 import org.apache.jena.assembler.assemblers.AssemblerBase;
 import org.apache.jena.rdf.model.Resource;
 
+/**
+ * Creates generic analyzers given a fully qualified Class name and a list
+ * of parameters for a constructor of the Class.
+ * <p>
+ * The parameters may be of the following types:
+ * <pre>
+ *     string    String
+ *     set       org.apache.lucene.analysis.util.CharArraySet
+ *     file      java.io.FileReader
+ *     int       int
+ *     boolean   boolean
+ * </pre>
+ * 
+ * Although the list of types is not exhaustive it is a simple matter
+ * to create a wrapper Analyzer that reads a file with information that can
+ * be used to initialize any sort of parameters that may be needed for
+ * a given Analyzer. The provided types cover the vast majority of cases.
+ * <p>
+ * For example, <code>org.apache.lucene.analysis.ja.JapaneseAnalyzer</code>
+ * has a constructor with 4 parameters: a <code>UserDict</code>,
+ * a <code>CharArraySet</code>, a <code>JapaneseTokenizer.Mode</code>, and a 
+ * <code>Set&lt;String></code>. So a simple wrapper can extract the values
+ * needed for the various parameters with types not available in this
+ * extension, construct the required instances, and instantiate the
+ * <code>JapaneseAnalyzer</code>.
+ * <p>
+ * Adding custom Analyzers such as the above wrapper analyzer is a simple
+ * matter of adding the Analyzer class and any associated filters and tokenizer
+ * and so on to the classpath for Jena - usually in a jar. Of course, all of 
+ * the Analyzers that are included in the Lucene distribution bundled with Jena
+ * are available as generic Analyzers as well.
+ * <p>
+ * Each parameter object is specified with:
+ * <ul>
+ * <li>an optional <code>text:paramName</code> that may be used to document which 
+ * parameter is represented</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>string</code>, 
+ * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * </ul>
+ * <p>
+ * A parameter of type <code>set</code> <i>may have</i> zero or more <code>text:paramValue</code>s.
+ * <p>
+ * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
+ * <code>int</code> <i>must have</i> a single <code>text:paramValue</code>
+ */
 public class GenericAnalyzerAssembler extends AssemblerBase {
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
+               text:params [
+                    a rdf:seq ;
+                    rdf:_1 [
+                        text:paramName "stopwords" ;
+                        text:paramType "set" ;
+                        text:paramValue "the", "a", "an" ] ;
+                    rdf:_2 [
+                        text:paramName "stemExclusionSet" ;
+                        text:paramType "set" ;
+                        text:paramValue "ing", "ed" ]
+                    ]
+                ]
+          ] .
+     */
 
 	public GenericAnalyzerAssembler() {
 		// TODO Auto-generated constructor stub

From 27ea30b73855d7a3cf0cd9561d2089295ec03353 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Thu, 20 Apr 2017 15:37:00 -0500
Subject: [PATCH 03/13] implement GenericAnalyzerAssembler. TO DO: Tests

---
 .../assembler/GenericAnalyzerAssembler.java   | 332 ++++++++++++++++--
 .../query/text/assembler/TextAssembler.java   |   2 +-
 .../jena/query/text/assembler/TextVocab.java  |   8 +-
 3 files changed, 318 insertions(+), 24 deletions(-)

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index db707d2b242..7fb04cce4dc 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -18,10 +18,24 @@
 
 package org.apache.jena.query.text.assembler;
 
+import java.io.Reader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Creates generic analyzers given a fully qualified Class name and a list
@@ -64,10 +78,29 @@
  * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
  * </ul>
  * <p>
- * A parameter of type <code>set</code> <i>may have</i> zero or more <code>text:paramValue</code>s.
+ * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
  * <p>
  * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code>
+ * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * <p>
+ * Example:
+ * <pre>
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
+               text:params (
+                    [ text:paramName "stopwords" ;
+                      text:paramType "set" ;
+                      text:paramValue ("the" "a" "an") ]
+                    [ text:paramName "stemExclusionSet" ;
+                      text:paramType "set" ;
+                      text:paramValue ("ing" "ed") ]
+                    )
+           ] .
+ * </pre>
  */
 public class GenericAnalyzerAssembler extends AssemblerBase {
     /*
@@ -77,29 +110,284 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
            text:analyzer [
                a text:GenericAnalyzer ;
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
-               text:params [
-                    a rdf:seq ;
-                    rdf:_1 [
-                        text:paramName "stopwords" ;
-                        text:paramType "set" ;
-                        text:paramValue "the", "a", "an" ] ;
-                    rdf:_2 [
-                        text:paramName "stemExclusionSet" ;
-                        text:paramType "set" ;
-                        text:paramValue "ing", "ed" ]
-                    ]
-                ]
-          ] .
+               text:params (
+                    [ text:paramName "stopwords" ;
+                      text:paramType "set" ;
+                      text:paramValue ("the" "a" "an") ]
+                    [ text:paramName "stemExclusionSet" ;
+                      text:paramType "set" ;
+                      text:paramValue ("ing" "ed") ]
+                    )
+           ] .
      */
 
-	public GenericAnalyzerAssembler() {
-		// TODO Auto-generated constructor stub
-	}
-
 	@Override
-	public Object open(Assembler a, Resource root, Mode mode) {
-		// TODO Auto-generated method stub
-		return null;
+	public Analyzer open(Assembler a, Resource root, Mode mode) {
+	    if (root.hasProperty(TextVocab.pClass)) {
+	        // text:class is expected to be a string literal
+	        String className = root.getProperty(TextVocab.pClass).getString();
+
+	        // is the class accessible?
+	        Class<?> clazz = null;
+	        try {
+	            clazz = Class.forName(className);
+	        } catch (ClassNotFoundException e) {
+	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+	            return null;
+	        }
+
+	        // Is the class an Analyzer?
+	        if (!Analyzer.class.isAssignableFrom(clazz)) {
+	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+	            return null;
+	        }
+	        
+	        if (root.hasProperty(TextVocab.pParams)) {
+	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+	            if (! node.isResource()) {
+	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+	            }
+
+	            List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+	            // split the param specs into classes and values for constructor lookup
+	            final Class<?> paramClasses[] = new Class<?>[specs.size()];
+	            final Object paramValues[] = new Object[specs.size()];
+	            for (int i = 0; i < specs.size(); i++) {
+	                ParamSpec spec = specs.get(i);
+	                paramClasses[i] = spec.getValueClass();
+	                paramValues[i] = spec.getValue();
+	            }
+
+	            // Create new analyzer
+	            return newAnalyzer(clazz, paramClasses, paramValues);
+
+	        } else {
+	            // use the nullary Analyzer constructor
+	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+	        }
+	    } else {
+	        throw new TextIndexException("text:class property is required by GenericAnalyzer");
+	    }
 	}
 
+    /**
+     * Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
+     *
+     * @param clazz The analyzer class
+     * @param paramClasses The parameter classes
+     * @param paramValues The parameter values
+     * @return The lucene analyzer
+     */
+    private Analyzer newAnalyzer(Class<?> clazz, Class<?>[] paramClasses, Object[] paramValues) {
+
+        String className = clazz.getName();
+
+        try {
+            final Constructor<?> cstr = clazz.getDeclaredConstructor(paramClasses);
+
+            return (Analyzer) cstr.newInstance(paramValues);
+
+        } catch (IllegalArgumentException | IllegalAccessException | InstantiationException | InvocationTargetException | SecurityException e) {
+            Log.error(this, "Exception while instantiating analyzer class " + className + ". " + e.getMessage(), e);
+        } catch (NoSuchMethodException ex) {
+            Log.error(this, "Could not find matching analyzer class constructor for " + className + " " + ex.getMessage(), ex);
+        }
+
+        return null;
+    }
+    
+    private List<ParamSpec> getParamSpecs(Resource list) {
+        List<ParamSpec> result = new ArrayList<>();
+        Resource current = list;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            result.add(getParamSpec((Resource) first));
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return result;
+    }
+    
+    private ParamSpec getParamSpec(Resource node) {
+        Statement nameStmt = node.getProperty(TextVocab.pParamName);
+        Statement typeStmt = node.getProperty(TextVocab.pParamType);
+        Statement valueStmt = node.getProperty(TextVocab.pParamValue);
+        
+        String name = getStringValue(nameStmt);
+        String type = getStringValue(typeStmt);
+        String value = getStringValue(valueStmt);
+
+        switch (type) {
+
+        // String
+        case "string": {
+            if (value == null) {
+                throw new TextIndexException("Value for string param: " + name + " must not be empty!");
+            }
+
+            return new ParamSpec(name, value, String.class);
+        }
+        
+        // "java.io.FileReader":
+        case "file": {
+
+            if (value == null) {
+                throw new TextIndexException("Value for file param must exist and must contain a file name.");
+            }
+
+            try {
+                // The analyzer is responsible for closing the file
+                Reader fileReader = new java.io.FileReader(value);
+                return new ParamSpec(name, fileReader, Reader.class);
+
+            } catch (java.io.FileNotFoundException ex) {
+                throw new TextIndexException("File " + value + " for param " + name + " not found!");
+            }
+        }
+        
+        // "org.apache.lucene.analysis.util.CharArraySet":
+        case "set": {
+            if (valueStmt == null) {
+                throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
+            }
+            
+            RDFNode valueNode = valueStmt.getObject();
+            if (!valueNode.isResource()) {
+                throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
+            }
+            
+            List<String> values = toStrings((Resource) valueNode);
+
+            return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
+        }
+        
+        // "int":
+        case "int":
+            if (value == null) {
+                throw new TextIndexException("Value for int param: " + name + " must not be empty!");
+            }
+
+            int n = ((Literal) valueStmt.getObject()).getInt();
+            return new ParamSpec(name, n, int.class);
+
+        // "boolean":
+        case "boolean":
+            if (value == null) {
+                throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
+            }
+
+            boolean b = ((Literal) valueStmt.getObject()).getBoolean();
+            return new ParamSpec(name, b, boolean.class);
+        
+        default:
+            // there was no match
+            Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
+            break;
+        }
+
+        return null;
+    }
+    
+    private String getStringValue(Statement stmt) {
+        if (stmt == null) {
+            return null;
+        } else {
+            RDFNode node = stmt.getObject();
+            if (node.isLiteral()) {
+                return ((Literal) node).getLexicalForm();
+            } else {
+                return null;
+            }
+        }
+    }
+
+    private List<String> toStrings(Resource list) {
+        List<String> result = new ArrayList<>();
+        Resource current = list;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("param spec of type set not well formed");
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isLiteral()) {
+                throw new TextIndexException("param spec of type set item is not a literal: " + first);
+            }
+            
+            result.add(((Literal)first).getLexicalForm());
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("param spec of type set not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return result;
+    }
+
+    /**
+     * <code>ParamSpec</code> contains the <code>name</code>, <code>Class</code>, and 
+     * <code>value</code> of a parameter for a constructor (or really any method in general)
+     */
+    private static final class ParamSpec {
+
+        private final String name;
+        private final Object value;
+        private final Class<?> clazz;
+
+        @SuppressWarnings("unused")
+        public ParamSpec(String key, Object value) {
+            this(key, value, value.getClass());
+        }
+
+        public ParamSpec(String key, Object value, Class<?> clazz) {
+            this.name = key;
+            this.value = value;
+            this.clazz = clazz;
+        }
+
+        @SuppressWarnings("unused")
+        public String getKey() {
+            return name;
+        }
+
+        public Object getValue() {
+            return value;
+        }
+
+        public Class<?> getValueClass() {
+            return clazz;
+        }
+    }
 }
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 636c6bc3929..45f5cee7cec 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -37,7 +37,7 @@ public static void init()
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
-        Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.genericAnalyzer,  new GenericAnalyzerAssembler()) ;
 
     }
 }
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index bc49d1048b3..cd1844d2cd3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -66,7 +66,6 @@ public class TextVocab
     public static final Resource lowerCaseKeywordAnalyzer    = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
     public static final Resource localizedAnalyzer    = Vocab.resource(NS, "LocalizedAnalyzer");
     public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer");
-    public static final Resource genericAnalyzer   = Vocab.resource(NS, "GenericAnalyzer");
     
     // Tokenizers
     public static final Resource standardTokenizer  = Vocab.resource(NS, "StandardTokenizer");
@@ -87,5 +86,12 @@ public class TextVocab
     public static final Property pReplicas          = Vocab.property(NS, "replicas");
     public static final Property pIndexName          = Vocab.property(NS, "indexName");
 
+    //GenericAnalyzer
+    public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
+    public static final Property pClass             = Vocab.property(NS, "class");
+    public static final Property pParams            = Vocab.property(NS, "params");
+    public static final Property pParamName         = Vocab.property(NS, "paramName");
+    public static final Property pParamType         = Vocab.property(NS, "paramType");
+    public static final Property pParamValue        = Vocab.property(NS, "paramValue");
 }
 

From 8f1fa7ccbf2cb05f2eed121831c39e07260ec18b Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Fri, 21 Apr 2017 16:02:20 -0500
Subject: [PATCH 04/13] adding GenericAnalyzer tests

---
 .../assembler/GenericAnalyzerAssembler.java   |   2 +-
 .../org/apache/jena/query/text/TS_Text.java   |   2 +
 .../TestGenericAnalyzerAssembler.java         | 116 ++++++++++++++++++
 3 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 7fb04cce4dc..0fa706efcb7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -167,7 +167,7 @@ public Analyzer open(Assembler a, Resource root, Mode mode) {
 	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
 	        }
 	    } else {
-	        throw new TextIndexException("text:class property is required by GenericAnalyzer");
+	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
 	    }
 	}
 
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 7259b111f84..91663f22f86 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -21,6 +21,7 @@
 import org.apache.jena.query.text.assembler.TestEntityMapAssembler;
 import org.apache.jena.query.text.assembler.TestTextDatasetAssembler;
 import org.apache.jena.query.text.assembler.TestTextIndexLuceneAssembler;
+import org.apache.jena.query.text.assembler.TestGenericAnalyzerAssembler;
 import org.junit.runner.RunWith;
 import org.junit.runners.Suite;
 import org.junit.runners.Suite.SuiteClasses;
@@ -47,6 +48,7 @@
     , TestDatasetWithConfigurableAnalyzer.class
     , TestDatasetWithAnalyzingQueryParser.class
     , TestDatasetWithComplexPhraseQueryParser.class
+    , TestGenericAnalyzerAssembler.class
 })
 
 public class TS_Text
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
new file mode 100644
index 00000000000..e2a48938b2f
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -0,0 +1,116 @@
+package org.apache.jena.query.text.assembler;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.logging.Log;
+import org.apache.jena.atlas.logging.LogCtl;
+import org.apache.jena.query.text.EntityDefinition;
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Property;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.jena.vocabulary.RDFS;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestGenericAnalyzerAssembler {
+    
+//    // Suppress warnings
+//    @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
+//    @AfterClass  public static void afterClass()  { LogCtl.setInfo(EntityDefinitionAssembler.class); }
+
+    private static final String TESTBASE = "http://example.org/test/";
+    private static final Resource spec1;
+    private static final Resource spec2;
+    private static final Resource spec3;
+    
+    @Test public void AnalyzerNullaryCtor() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec1, null);
+        assertEquals(SimpleAnalyzer.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerNullaryCtor2() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec2, null);
+        assertEquals(FrenchAnalyzer.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorSet1() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec3, null);
+        assertEquals(FrenchAnalyzer.class, analyzer.getClass());
+    }
+    
+    
+    private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
+    private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+    
+    private static final String PARAM_TYPE_BOOL = "boolean";
+    private static final String PARAM_TYPE_FILE = "file";
+    private static final String PARAM_TYPE_INT = "int";
+    private static final String PARAM_TYPE_SET = "set";
+    private static final String PARAM_TYPE_STRING = "string";
+    
+    static {
+        TextAssembler.init();
+        Model model = ModelFactory.createDefaultModel();
+        
+        // analyzer spec w/ no params
+                
+        spec1 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SIMPLE)
+                     ;
+        
+        // analyzer spec w/ empty params
+                
+        spec2 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_FRENCH)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { } )
+                                  )
+                     ;
+        
+        // analyzer spec w/ one set param
+                
+        spec3 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_FRENCH)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "stopWords")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+                                                  .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
+                                          }))
+                     ;
+    }
+    
+    private static Resource strs2list(Model model, String string) {
+        String[] members = string.split("\\s");
+        Resource current = RDF.nil;
+        for (int i = members.length-1; i>=0; i--) {
+            Resource previous = current;
+            current = model.createResource();
+            current.addProperty(RDF.rest, previous);
+            current.addProperty(RDF.first, members[i]);            
+        }
+        return current;    
+    }
+}

From d2f0561b99c957658261b3693e4a89892369a65a Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sat, 22 Apr 2017 12:29:04 -0500
Subject: [PATCH 05/13] added parameters of type
 org.apache.lucene.analysis.Analyzer

---
 .../assembler/GenericAnalyzerAssembler.java   | 61 +++++++++++++++----
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 0fa706efcb7..853fcb614d7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -48,6 +48,7 @@
  *     file      java.io.FileReader
  *     int       int
  *     boolean   boolean
+ *     analyzer  org.apache.lucene.analysis.Analyzer
  * </pre>
  * 
  * Although the list of types is not exhaustive it is a simple matter
@@ -83,7 +84,7 @@
  * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
  * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
  * <p>
- * Example:
+ * Examples:
  * <pre>
     text:map (
          [ text:field "text" ; 
@@ -101,6 +102,23 @@
                     )
            ] .
  * </pre>
+ * <pre>
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
+               text:params (
+                    [ text:paramName "defaultAnalyzer" ;
+                      text:paramType "analyzer" ;
+                      text:paramValue [ a text:SimpleAnalyzer ] ]
+                    [ text:paramName "maxShingleSize" ;
+                      text:paramType "int" ;
+                      text:paramValue 3 ]
+                    )
+           ] .
+ * </pre>
  */
 public class GenericAnalyzerAssembler extends AssemblerBase {
     /*
@@ -121,7 +139,14 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
            ] .
      */
 
-	@Override
+    public static final String TYPE_ANALYZER = "analyzer";
+    public static final String TYPE_BOOL = "boolean";
+    public static final String TYPE_FILE = "file";
+    public static final String TYPE_INT = "int";
+    public static final String TYPE_SET = "set";
+    public static final String TYPE_STRING = "string";
+
+    @Override
 	public Analyzer open(Assembler a, Resource root, Mode mode) {
 	    if (root.hasProperty(TextVocab.pClass)) {
 	        // text:class is expected to be a string literal
@@ -242,7 +267,7 @@ private ParamSpec getParamSpec(Resource node) {
         switch (type) {
 
         // String
-        case "string": {
+        case TYPE_STRING: {
             if (value == null) {
                 throw new TextIndexException("Value for string param: " + name + " must not be empty!");
             }
@@ -250,8 +275,8 @@ private ParamSpec getParamSpec(Resource node) {
             return new ParamSpec(name, value, String.class);
         }
         
-        // "java.io.FileReader":
-        case "file": {
+        // java.io.FileReader
+        case TYPE_FILE: {
 
             if (value == null) {
                 throw new TextIndexException("Value for file param must exist and must contain a file name.");
@@ -267,8 +292,8 @@ private ParamSpec getParamSpec(Resource node) {
             }
         }
         
-        // "org.apache.lucene.analysis.util.CharArraySet":
-        case "set": {
+        // org.apache.lucene.analysis.util.CharArraySet
+        case TYPE_SET: {
             if (valueStmt == null) {
                 throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
             }
@@ -283,8 +308,8 @@ private ParamSpec getParamSpec(Resource node) {
             return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
         }
         
-        // "int":
-        case "int":
+        // int
+        case TYPE_INT:
             if (value == null) {
                 throw new TextIndexException("Value for int param: " + name + " must not be empty!");
             }
@@ -292,8 +317,8 @@ private ParamSpec getParamSpec(Resource node) {
             int n = ((Literal) valueStmt.getObject()).getInt();
             return new ParamSpec(name, n, int.class);
 
-        // "boolean":
-        case "boolean":
+        // boolean
+        case TYPE_BOOL:
             if (value == null) {
                 throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
             }
@@ -301,6 +326,20 @@ private ParamSpec getParamSpec(Resource node) {
             boolean b = ((Literal) valueStmt.getObject()).getBoolean();
             return new ParamSpec(name, b, boolean.class);
         
+        // org.apache.lucene.analysis.Analyzer
+        case TYPE_ANALYZER:
+            if (valueStmt == null) {
+                throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
+            }
+            
+            RDFNode valueNode = valueStmt.getObject();
+            if (!valueNode.isResource()) {
+                throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
+            }
+            
+            Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
+            return new ParamSpec(name, analyzer, Analyzer.class);
+        
         default:
             // there was no match
             Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);

From 94b41be7553a4f955c0e41c868d94662bdd7236e Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sat, 22 Apr 2017 12:29:47 -0500
Subject: [PATCH 06/13] added more tests

---
 .../TestGenericAnalyzerAssembler.java         | 121 ++++++++++++++++--
 jena-text/testing/some-stop-words.txt         |   6 +
 2 files changed, 118 insertions(+), 9 deletions(-)
 create mode 100644 jena-text/testing/some-stop-words.txt

diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index e2a48938b2f..2ddfa31e959 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -20,21 +20,22 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestGenericAnalyzerAssembler {
-    
-//    // Suppress warnings
-//    @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
-//    @AfterClass  public static void afterClass()  { LogCtl.setInfo(EntityDefinitionAssembler.class); }
 
     private static final String TESTBASE = "http://example.org/test/";
     private static final Resource spec1;
     private static final Resource spec2;
     private static final Resource spec3;
+    private static final Resource spec4;
+    private static final Resource spec5;
+    private static final Resource spec6;
     
     @Test public void AnalyzerNullaryCtor() {
         GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
@@ -54,15 +55,38 @@ public class TestGenericAnalyzerAssembler {
         assertEquals(FrenchAnalyzer.class, analyzer.getClass());
     }
     
+    @Test public void AnalyzerCtorAnalyzerInt() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec4, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorShingle7() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec5, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorFile() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec6, null);
+        assertEquals(StopAnalyzer.class, analyzer.getClass());
+    }
+    
     
     private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
     private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+    private static final String CLASS_SHINGLE = "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper";
+    private static final String CLASS_STOP = "org.apache.lucene.analysis.core.StopAnalyzer";
+    
+    private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_BOOL = "boolean";
-    private static final String PARAM_TYPE_FILE = "file";
-    private static final String PARAM_TYPE_INT = "int";
-    private static final String PARAM_TYPE_SET = "set";
-    private static final String PARAM_TYPE_STRING = "string";
+    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
+    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
+    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
+    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
+    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
+    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
     
     static {
         TextAssembler.init();
@@ -100,6 +124,85 @@ public class TestGenericAnalyzerAssembler {
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
+        
+        // analyzer spec w/ analyzer param and int
+                
+        spec4 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "defaultAnalyzer")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ),
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 3)
+                                          }))
+                     ;
+        
+        // analyzer spec w/ seven params of mixed types
+                
+        spec5 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "delegate")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "minShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 2) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 4) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "tokenSeparator")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "|") ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigrams")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, false) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, true) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "fillerToken")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "foo")
+                                          }))
+                     ;
+        
+        // analyzer spec w/ one file param
+                
+        spec6 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_STOP)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "stopWords")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamValue, FILE_STOPS)
+                                          }))
+                     ;
     }
     
     private static Resource strs2list(Model model, String string) {
diff --git a/jena-text/testing/some-stop-words.txt b/jena-text/testing/some-stop-words.txt
new file mode 100644
index 00000000000..e648d66f2bb
--- /dev/null
+++ b/jena-text/testing/some-stop-words.txt
@@ -0,0 +1,6 @@
+foo
+bar
+baz
+flip
+flop
+mop
\ No newline at end of file

From 57ded6a9c1f7d275de4f8e6294611a869407534d Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sat, 22 Apr 2017 16:15:58 -0500
Subject: [PATCH 07/13] ignore: organize imports

---
 .../assembler/TestGenericAnalyzerAssembler.java    | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 2ddfa31e959..87c5d75f161 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -1,35 +1,21 @@
 package org.apache.jena.query.text.assembler;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
-import java.util.List;
-
-import org.apache.jena.assembler.Assembler;
-import org.apache.jena.atlas.logging.Log;
-import org.apache.jena.atlas.logging.LogCtl;
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextIndexLucene;
 import org.apache.jena.rdf.model.Model;
 import org.apache.jena.rdf.model.ModelFactory;
-import org.apache.jena.rdf.model.Property;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.vocabulary.RDF;
-import org.apache.jena.vocabulary.RDFS;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestGenericAnalyzerAssembler {
 
-    private static final String TESTBASE = "http://example.org/test/";
     private static final Resource spec1;
     private static final Resource spec2;
     private static final Resource spec3;

From a3bb8e41aeaf9be3540cf0a6be84cd9dc9b43b28 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sat, 22 Apr 2017 16:31:54 -0500
Subject: [PATCH 08/13] added analyzer definitions: 1) DefinedAnalyzers for use
 in text:map; 2) add analyzers to Multilingual support based on BCP47 codes

---
 .../apache/jena/query/text/analyzer/Util.java |   4 +
 .../assembler/DefinedAnalyzerAssembler.java   | 100 ++++++++++++++++++
 .../query/text/assembler/TextAssembler.java   |   1 +
 .../assembler/TextIndexLuceneAssembler.java   |  12 +++
 .../jena/query/text/assembler/TextVocab.java  |   5 +
 5 files changed, 122 insertions(+)
 create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index fb2582a5b08..20c757388a4 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -51,6 +51,10 @@ public static Analyzer getLocalizedAnalyzer(String lang) {
             return null;
         }
     }
+    
+    public static void addAnalyzer(String lang, Analyzer analyzer) {
+        cache.put(lang, analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
new file mode 100644
index 00000000000..e7bd941ff2c
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -0,0 +1,100 @@
+package org.apache.jena.query.text.assembler;
+
+import java.util.Hashtable;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefinedAnalyzerAssembler extends AssemblerBase {
+    
+    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
+    
+    public static void addAnalyzer(Resource key, Analyzer analyzer) {
+        analyzers.put(key, analyzer);
+    }
+    
+    public static boolean addAnalyzers(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+   
+    @Override
+    public Object open(Assembler a, Resource root, Mode mode) {
+        
+        if (root.hasProperty(TextVocab.pUseAnalyzer)) {
+            Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
+            Resource key = useStmt.getResource();
+            
+            return analyzers.get(key);
+        }
+        
+        return null;
+    }
+
+}
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 45f5cee7cec..6cbb2dae00e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -38,6 +38,7 @@ public static void init()
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.genericAnalyzer,  new GenericAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.definedAnalyzer,  new DefinedAnalyzerAssembler()) ;
 
     }
 }
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 0ec1e5b8350..7acfb9eb47c 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -115,6 +115,18 @@ public TextIndex open(Assembler a, Resource root, Mode mode) {
                 }
                 isMultilingualSupport = mlsNode.asLiteral().getBoolean();
             }
+            
+            Statement defAnalyzersStatement = root.getProperty(pDefAnalyzers);
+            if (null != defAnalyzersStatement) {
+                RDFNode aNode = defAnalyzersStatement.getObject();
+                if (! aNode.isResource()) {
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                }
+                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                if (addedLangs) {
+                    isMultilingualSupport = true;
+                }
+            }
 
             boolean storeValues = false;
             Statement storeValuesStatement = root.getProperty(pStoreValues);
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index cd1844d2cd3..b051252db95 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -88,10 +88,15 @@ public class TextVocab
 
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
+    public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");
     public static final Property pParamType         = Vocab.property(NS, "paramType");
     public static final Property pParamValue        = Vocab.property(NS, "paramValue");
+    public static final Property pDefAnalyzers      = Vocab.property(NS, "defineAnalyzers");
+    public static final Property pDefAnalyzer       = Vocab.property(NS, "defineAnalyzer");
+    public static final Property pAddLang           = Vocab.property(NS, "addLang");
+    public static final Property pUseAnalyzer       = Vocab.property(NS, "useAnalyzer");
 }
 

From 311efab2fd26a58406b29b64d74b41039292d080 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sun, 23 Apr 2017 09:18:35 -0500
Subject: [PATCH 09/13] represent parameter types as resources like
 text:TypeSet instead of literal string

---
 .../assembler/GenericAnalyzerAssembler.java   | 208 +++++++++---------
 .../jena/query/text/assembler/TextVocab.java  |   6 +
 .../TestGenericAnalyzerAssembler.java         |  29 +--
 3 files changed, 125 insertions(+), 118 deletions(-)

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 853fcb614d7..4f10b85e5bf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -43,12 +43,12 @@
  * <p>
  * The parameters may be of the following types:
  * <pre>
- *     string    String
- *     set       org.apache.lucene.analysis.util.CharArraySet
- *     file      java.io.FileReader
- *     int       int
- *     boolean   boolean
- *     analyzer  org.apache.lucene.analysis.Analyzer
+ *     text:TypeString    String
+ *     text:TypeSet       org.apache.lucene.analysis.util.CharArraySet
+ *     text:TypeFile      java.io.FileReader
+ *     text:TypeInt       int
+ *     text:TypeBoolean   boolean
+ *     text:TypeAnalyzer  org.apache.lucene.analysis.Analyzer
  * </pre>
  * 
  * Although the list of types is not exhaustive it is a simple matter
@@ -74,15 +74,18 @@
  * <ul>
  * <li>an optional <code>text:paramName</code> that may be used to document which 
  * parameter is represented</li>
- * <li>a <code>text:paramType</code> which is one of: <code>string</code>, 
- * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
- * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>text:TypeString</code>, 
+ * <code>text:TypeSet</code>, <code>text:TypeFile</code>, <code>text:TypeInt</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeAnalyzer</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int or resource.</li>
  * </ul>
  * <p>
- * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
+ * A parameter of type <code>text:TypeSet</code> <i>must have</i> a list of zero or 
+ * more <code>String</code>s.
  * <p>
- * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * A parameter of type <code>text:TypeString</code>, <code>text:TypeFile</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeInt</code> or <code>text:TypeAnalyzer</code> 
+ * <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
  * <p>
  * Examples:
  * <pre>
@@ -94,10 +97,10 @@
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
@@ -111,10 +114,10 @@
                text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
                text:params (
                     [ text:paramName "defaultAnalyzer" ;
-                      text:paramType "analyzer" ;
+                      text:paramType text:TypeAnalyzer ;
                       text:paramValue [ a text:SimpleAnalyzer ] ]
                     [ text:paramName "maxShingleSize" ;
-                      text:paramType "int" ;
+                      text:paramType text:TypeInt ;
                       text:paramValue 3 ]
                     )
            ] .
@@ -130,71 +133,71 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
      */
 
-    public static final String TYPE_ANALYZER = "analyzer";
-    public static final String TYPE_BOOL = "boolean";
-    public static final String TYPE_FILE = "file";
-    public static final String TYPE_INT = "int";
-    public static final String TYPE_SET = "set";
-    public static final String TYPE_STRING = "string";
+    public static final String TYPE_ANALYZER   = "TypeAnalyzer";
+    public static final String TYPE_BOOL       = "TypeBoolean";
+    public static final String TYPE_FILE       = "TypeFile";
+    public static final String TYPE_INT        = "TypeInt";
+    public static final String TYPE_SET        = "TypeSet";
+    public static final String TYPE_STRING     = "TypeString";
 
     @Override
-	public Analyzer open(Assembler a, Resource root, Mode mode) {
-	    if (root.hasProperty(TextVocab.pClass)) {
-	        // text:class is expected to be a string literal
-	        String className = root.getProperty(TextVocab.pClass).getString();
-
-	        // is the class accessible?
-	        Class<?> clazz = null;
-	        try {
-	            clazz = Class.forName(className);
-	        } catch (ClassNotFoundException e) {
-	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
-	            return null;
-	        }
-
-	        // Is the class an Analyzer?
-	        if (!Analyzer.class.isAssignableFrom(clazz)) {
-	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
-	            return null;
-	        }
-	        
-	        if (root.hasProperty(TextVocab.pParams)) {
-	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
-	            if (! node.isResource()) {
-	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
-	            }
-
-	            List<ParamSpec> specs = getParamSpecs((Resource) node);
-
-	            // split the param specs into classes and values for constructor lookup
-	            final Class<?> paramClasses[] = new Class<?>[specs.size()];
-	            final Object paramValues[] = new Object[specs.size()];
-	            for (int i = 0; i < specs.size(); i++) {
-	                ParamSpec spec = specs.get(i);
-	                paramClasses[i] = spec.getValueClass();
-	                paramValues[i] = spec.getValue();
-	            }
-
-	            // Create new analyzer
-	            return newAnalyzer(clazz, paramClasses, paramValues);
-
-	        } else {
-	            // use the nullary Analyzer constructor
-	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
-	        }
-	    } else {
-	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
-	    }
-	}
+    public Analyzer open(Assembler a, Resource root, Mode mode) {
+        if (root.hasProperty(TextVocab.pClass)) {
+            // text:class is expected to be a string literal
+            String className = root.getProperty(TextVocab.pClass).getString();
+
+            // is the class accessible?
+            Class<?> clazz = null;
+            try {
+                clazz = Class.forName(className);
+            } catch (ClassNotFoundException e) {
+                Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+                return null;
+            }
+
+            // Is the class an Analyzer?
+            if (!Analyzer.class.isAssignableFrom(clazz)) {
+                Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+                return null;
+            }
+
+            if (root.hasProperty(TextVocab.pParams)) {
+                RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+                if (! node.isResource()) {
+                    throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+                }
+
+                List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+                // split the param specs into classes and values for constructor lookup
+                final Class<?> paramClasses[] = new Class<?>[specs.size()];
+                final Object paramValues[] = new Object[specs.size()];
+                for (int i = 0; i < specs.size(); i++) {
+                    ParamSpec spec = specs.get(i);
+                    paramClasses[i] = spec.getValueClass();
+                    paramValues[i] = spec.getValue();
+                }
+
+                // Create new analyzer
+                return newAnalyzer(clazz, paramClasses, paramValues);
+
+            } else {
+                // use the nullary Analyzer constructor
+                return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+            }
+        } else {
+            throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
+        }
+    }
 
     /**
      * Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
@@ -221,47 +224,52 @@ private Analyzer newAnalyzer(Class<?> clazz, Class<?>[] paramClasses, Object[] p
 
         return null;
     }
-    
+
     private List<ParamSpec> getParamSpecs(Resource list) {
         List<ParamSpec> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("parameter list not well formed: " + current);
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isResource()) {
                 throw new TextIndexException("parameter specification must be an anon resource : " + first);
             }
 
             result.add(getParamSpec((Resource) first));
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("parameter list not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("parameter list node is not a resource : " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
-    
+
     private ParamSpec getParamSpec(Resource node) {
         Statement nameStmt = node.getProperty(TextVocab.pParamName);
         Statement typeStmt = node.getProperty(TextVocab.pParamType);
         Statement valueStmt = node.getProperty(TextVocab.pParamValue);
         
+        if (typeStmt == null) {
+            throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
+        }        
+        Resource typeRes = typeStmt.getResource();
+        String type = typeRes.getLocalName();
+
         String name = getStringValue(nameStmt);
-        String type = getStringValue(typeStmt);
         String value = getStringValue(valueStmt);
 
         switch (type) {
@@ -274,7 +282,7 @@ private ParamSpec getParamSpec(Resource node) {
 
             return new ParamSpec(name, value, String.class);
         }
-        
+
         // java.io.FileReader
         case TYPE_FILE: {
 
@@ -291,23 +299,23 @@ private ParamSpec getParamSpec(Resource node) {
                 throw new TextIndexException("File " + value + " for param " + name + " not found!");
             }
         }
-        
+
         // org.apache.lucene.analysis.util.CharArraySet
         case TYPE_SET: {
             if (valueStmt == null) {
                 throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
             }
-            
+
             List<String> values = toStrings((Resource) valueNode);
 
             return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
         }
-        
+
         // int
         case TYPE_INT:
             if (value == null) {
@@ -317,7 +325,7 @@ private ParamSpec getParamSpec(Resource node) {
             int n = ((Literal) valueStmt.getObject()).getInt();
             return new ParamSpec(name, n, int.class);
 
-        // boolean
+            // boolean
         case TYPE_BOOL:
             if (value == null) {
                 throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
@@ -325,21 +333,21 @@ private ParamSpec getParamSpec(Resource node) {
 
             boolean b = ((Literal) valueStmt.getObject()).getBoolean();
             return new ParamSpec(name, b, boolean.class);
-        
-        // org.apache.lucene.analysis.Analyzer
+
+            // org.apache.lucene.analysis.Analyzer
         case TYPE_ANALYZER:
             if (valueStmt == null) {
                 throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
             }
-            
+
             Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
             return new ParamSpec(name, analyzer, Analyzer.class);
-        
+
         default:
             // there was no match
             Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
@@ -348,7 +356,7 @@ private ParamSpec getParamSpec(Resource node) {
 
         return null;
     }
-    
+
     private String getStringValue(Statement stmt) {
         if (stmt == null) {
             return null;
@@ -365,33 +373,33 @@ private String getStringValue(Statement stmt) {
     private List<String> toStrings(Resource list) {
         List<String> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("param spec of type set not well formed");
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isLiteral()) {
                 throw new TextIndexException("param spec of type set item is not a literal: " + first);
             }
-            
+
             result.add(((Literal)first).getLexicalForm());
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("param spec of type set not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
 
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index b051252db95..78cf0c0ad3d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -89,6 +89,12 @@ public class TextVocab
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
     public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
+    public static final Resource typeAnalyzer       = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_ANALYZER);
+    public static final Resource typeBoolean        = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_BOOL);
+    public static final Resource typeFile           = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_FILE);
+    public static final Resource typeInt            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_INT);
+    public static final Resource typeSet            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_SET);
+    public static final Resource typeString         = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_STRING);
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 87c5d75f161..3effc39bb01 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -67,13 +67,6 @@ public class TestGenericAnalyzerAssembler {
     
     private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
-    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
-    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
-    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
-    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
-    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
-    
     static {
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
@@ -106,7 +99,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeSet)
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
@@ -121,14 +114,14 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "defaultAnalyzer")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ),
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 3)
                                           }))
                      ;
@@ -143,34 +136,34 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "delegate")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "minShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 2) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 4) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "tokenSeparator")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "|") ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigrams")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, false) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, true) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "fillerToken")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "foo")
                                           }))
                      ;
@@ -185,7 +178,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeFile)
                                                   .addProperty(TextVocab.pParamValue, FILE_STOPS)
                                           }))
                      ;

From 5edb6c8758124fe8dd5a96d7b92949fc3ac1f61f Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sun, 23 Apr 2017 10:13:09 -0500
Subject: [PATCH 10/13] factor DefinedAnalyzerAssembler and
 DefineAnalyzersAssembler into separate classes; move defined analyzer cache
 to Utils along side the language tagged analyzers since both caches have the
 same lifetime and similar uses.

---
 .../apache/jena/query/text/analyzer/Util.java |  12 ++
 .../assembler/DefineAnalyzersAssembler.java   | 105 ++++++++++++++++++
 .../assembler/DefinedAnalyzerAssembler.java   | 103 +++++------------
 .../assembler/TextIndexLuceneAssembler.java   |  10 +-
 4 files changed, 152 insertions(+), 78 deletions(-)
 create mode 100644 jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java

diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 20c757388a4..6ad0747d769 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text.analyzer;
 
+import org.apache.jena.rdf.model.Resource;
 import org.apache.lucene.analysis.Analyzer;
 import java.lang.reflect.Constructor;
 import java.util.Hashtable;
@@ -26,6 +27,9 @@ public class Util {
 
     private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
     private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+    
+    // cache of defined text:defineAnalyzers
+    private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
 
     static {
         initAnalyzerDefs();
@@ -55,6 +59,14 @@ public static Analyzer getLocalizedAnalyzer(String lang) {
     public static void addAnalyzer(String lang, Analyzer analyzer) {
         cache.put(lang, analyzer);
     }
+    
+    public static Analyzer getDefinedAnalyzer(Resource key) {
+        return definedAnalyzers.get(key.getURI());
+    }
+    
+    public static void defineAnalyzer(Resource key, Analyzer analyzer) {
+        definedAnalyzers.put(key.getURI(), analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
new file mode 100644
index 00000000000..11270e2bac8
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefineAnalyzersAssembler {
+    /*
+    <#indexLucene> a text:TextIndexLucene ;
+        text:directory <file:Lucene> ;
+        text:entityMap <#entMap> ;
+        text:defineAnalyzers (
+            [text:addLang "sa-x-iast" ;
+             text:analyzer [ . . . ]]
+            [text:defineAnalyzer <#foo> ;
+             text:analyzer [ . . . ]]
+        )
+    */
+
+    public static boolean open(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        Util.defineAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+}
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
index e7bd941ff2c..e6909ac62e5 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -1,88 +1,39 @@
-package org.apache.jena.query.text.assembler;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
-import java.util.Hashtable;
+package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.TextIndexException;
 import org.apache.jena.query.text.analyzer.Util;
-import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
-import org.apache.jena.vocabulary.RDF;
-import org.apache.lucene.analysis.Analyzer;
 
 public class DefinedAnalyzerAssembler extends AssemblerBase {
-    
-    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
-    
-    public static void addAnalyzer(Resource key, Analyzer analyzer) {
-        analyzers.put(key, analyzer);
-    }
-    
-    public static boolean addAnalyzers(Assembler a, Resource list) {
-        Resource current = list;
-        boolean isMultilingualSupport = false;
-        
-        while (current != null && ! current.equals(RDF.nil)){
-            Statement firstStmt = current.getProperty(RDF.first);
-            if (firstStmt == null) {
-                throw new TextIndexException("parameter list not well formed: " + current);
-            }
-            
-            RDFNode first = firstStmt.getObject();
-            if (! first.isResource()) {
-                throw new TextIndexException("parameter specification must be an anon resource : " + first);
-            }
-
-            // process the current list element to add an analyzer 
-            Resource adding = (Resource) first;
-            if (adding.hasProperty(TextVocab.pAnalyzer)) {
-                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
-                RDFNode analyzerNode = analyzerStmt.getObject();
-                if (!analyzerNode.isResource()) {
-                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
-                }
-                
-                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
-                
-                if (adding.hasProperty(TextVocab.pAddLang)) {
-                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
-                    String langCode = langStmt.getString();
-                    Util.addAnalyzer(langCode, analyzer);
-                    isMultilingualSupport = true;
-                }
-                
-                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
-                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
-                    Resource id = defStmt.getResource();
-                    
-                    if (id.getURI() != null) {
-                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
-                    } else {
-                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
-                    }
-                }
-            } else {
-                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
-            }
-            
-            Statement restStmt = current.getProperty(RDF.rest);
-            if (restStmt == null) {
-                throw new TextIndexException("parameter list not terminated by rdf:nil");
-            }
-            
-            RDFNode rest = restStmt.getObject();
-            if (! rest.isResource()) {
-                throw new TextIndexException("parameter list node is not a resource : " + rest);
-            }
-            
-            current = (Resource) rest;
-        }
-        
-        return isMultilingualSupport;
-    }
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:DefinedAnalyzer ;
+               text:useAnalyzer <#Foo> ]
+     */
    
     @Override
     public Object open(Assembler a, Resource root, Mode mode) {
@@ -91,7 +42,7 @@ public Object open(Assembler a, Resource root, Mode mode) {
             Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
             Resource key = useStmt.getResource();
             
-            return analyzers.get(key);
+            return Util.getDefinedAnalyzer(key);
         }
         
         return null;
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 7acfb9eb47c..14af9bf9d9b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -26,6 +26,7 @@
 import org.apache.jena.assembler.assemblers.AssemblerBase ;
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.IRILib ;
+import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.query.text.*;
 import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
@@ -120,10 +121,15 @@ public TextIndex open(Assembler a, Resource root, Mode mode) {
             if (null != defAnalyzersStatement) {
                 RDFNode aNode = defAnalyzersStatement.getObject();
                 if (! aNode.isResource()) {
-                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource (list) : " + aNode);
                 }
-                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                boolean addedLangs = DefineAnalyzersAssembler.open(a, (Resource) aNode);
+                // if the text:defineAnalyzers added any analyzers to lang tags then ensure that
+                // multilingual support is enabled
                 if (addedLangs) {
+                    if (!isMultilingualSupport) {
+                        Log.warn(this,  "Multilingual support implicitly enabled by text:defineAnalyzers");
+                    }
                     isMultilingualSupport = true;
                 }
             }

From fef4d22faeda09159cc2523e477571d1d23a85e7 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Sun, 23 Apr 2017 11:20:53 -0500
Subject: [PATCH 11/13] ignore extras

---
 jena-text/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jena-text/.gitignore b/jena-text/.gitignore
index e69de29bb2d..f7b49ee3ae3 100644
--- a/jena-text/.gitignore
+++ b/jena-text/.gitignore
@@ -0,0 +1 @@
+/text-query.mdtext

From 76c3ae3e9f2f35303914258d92eba47eae8e8a75 Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Wed, 28 Jun 2017 08:22:52 -0500
Subject: [PATCH 12/13] delete text-query.mdtext; update .gitignore; add
 license and JenaSystem.init()

---
 jena-text/.gitignore                          |  2 +-
 .../TestGenericAnalyzerAssembler.java         | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/jena-text/.gitignore b/jena-text/.gitignore
index f7b49ee3ae3..6c7b69a0156 100644
--- a/jena-text/.gitignore
+++ b/jena-text/.gitignore
@@ -1 +1 @@
-/text-query.mdtext
+.gitignore
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 3effc39bb01..bcd5a122185 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.jena.query.text.assembler;
 
 import static org.junit.Assert.assertEquals;
@@ -6,6 +24,7 @@
 import org.apache.jena.rdf.model.ModelFactory;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDF;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
@@ -68,6 +87,7 @@ public class TestGenericAnalyzerAssembler {
     private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
     static {
+        JenaSystem.init();
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
         

From ece2f41d8a381ee961dc61e8fafd66f5925bb0fa Mon Sep 17 00:00:00 2001
From: Chris Tomlinson <ct@moonvine.org>
Date: Wed, 28 Jun 2017 09:36:29 -0500
Subject: [PATCH 13/13] added JenaSystem.init() to other jena-text tests

---
 .../jena/query/text/assembler/TestEntityMapAssembler.java       | 2 ++
 .../jena/query/text/assembler/TestTextDatasetAssembler.java     | 2 ++
 .../jena/query/text/assembler/TestTextIndexLuceneAssembler.java | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
index cdf5dcc41e5..99a3e5645c4 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
@@ -32,6 +32,7 @@
 import org.apache.jena.query.text.analyzer.ConfigurableAnalyzer ;
 import org.apache.jena.query.text.analyzer.LowerCaseKeywordAnalyzer ;
 import org.apache.jena.rdf.model.* ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDF ;
 import org.apache.jena.vocabulary.RDFS ;
 import org.apache.lucene.analysis.core.KeywordAnalyzer ;
@@ -163,6 +164,7 @@ private static Object getOne(EntityDefinition entityDef, String field) {
     private static final Property SPEC2_PREDICATE1 = RDFS.label;
     private static final Property SPEC2_PREDICATE2 = RDFS.comment;
     static {
+        JenaSystem.init();
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
         
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
index dbef4df3a19..d6cb2ce2f2e 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
@@ -33,6 +33,7 @@
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.Quad ;
 import org.apache.jena.sparql.core.QuadAction ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.tdb.assembler.AssemblerTDB ;
 import org.apache.jena.vocabulary.RDF ;
 import org.junit.Test ;
@@ -93,6 +94,7 @@ public void testCustomTextDocProducerDyadicConstructor() {
     }
 
     static {
+        JenaSystem.init();
         TextAssembler.init();
         AssemblerTDB.init();
         spec1 =
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
index 67426615613..53d2eafb01e 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
@@ -21,6 +21,7 @@
 import org.apache.jena.assembler.Assembler ;
 import org.apache.jena.query.text.TextIndexLucene ;
 import org.apache.jena.rdf.model.Resource ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDFS ;
 import org.apache.lucene.analysis.core.KeywordAnalyzer ;
 import org.apache.lucene.store.RAMDirectory ;
@@ -99,6 +100,7 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
     }
 
     static {
+        JenaSystem.init();
         TextAssembler.init();
     }