From 6f214b768acb314df7785eedbea5f2f8342906d3 Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Mon, 1 Aug 2016 06:15:03 -0500 Subject: [PATCH 01/10] Updated javadocs and some other comments for user and developer --- .../pirk/schema/data/DataSchemaLoader.java | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index 44f99b7a..f7c076d7 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -41,22 +41,24 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import scala.tools.nsc.transform.patmat.Logic; /** * Class to load any data schemas specified in the properties file, 'data.schemas' *

* Schemas should be specified as follows: - * - *

- * {@code
+ *
+ * 
{@code
  * 
- *   name of the schema 
- *  
- *       element name /name>
- *       class name or type name (if Java primitive type) of the element 
- *       true or false -- whether or not the schema element is an array within the data 
- *       optional - Partitioner class for the element; defaults to primitive java type partitioner  
- *  
+ *     name of the schema 
+ *    
+ *         element name 
+ *         class name or type name (if Java primitive type) of the element 
+ *         whether or not the schema element is an array within the data.
+ *                  Set to true by including this tag with no text or the string "true" (comparison is case-insensitive).
+ *                  Omitting this tag or using any other text indicates this element is not an array.
+ *         optional - Partitioner class for the element; defaults to primitive java type partitioner 
+ *    
  * 
  * }
  * 
@@ -86,13 +88,27 @@ public class DataSchemaLoader } } + + /* Kept for compatibility */ + /** + * Initializes the static {@link DataSchemaRegistry} with a list of + * available registry names. + * @throws Exception + */ public static void initialize() throws Exception { initialize(false, null); } /* Kept for compatibility */ + /** + * Initializes the static {@link org.apache.pirk.schema.data.DataSchemaRegistry} with a list of + * available registry names. + * @param hdfs If true, specifies that the DataSchema is an hdfs file; if false, that it is a regular file. + * @param fs Used only when {@paramref hdfs} is true; the {@link org.apache.hadoop.fs.FileSystem} handle for the hdfs in which the DataSchema exists + * @throws Exception + */ public static void initialize(boolean hdfs, FileSystem fs) throws Exception { String dataSchemas = SystemConfiguration.getProperty("data.schemas", "none"); @@ -179,6 +195,13 @@ public DataSchema loadSchema(InputStream stream) throws IOException, PIRExceptio return dataSchema; } + /* + * Parses an XML document + * @param stream The input stream. + * @return A Document representing the XML document. + * @throws IOException + * @throws PIRException + */ private Document parseXMLDocument(InputStream stream) throws IOException, PIRException { Document doc; @@ -196,6 +219,12 @@ private Document parseXMLDocument(InputStream stream) throws IOException, PIRExc return doc; } + /* + * Extracts a data schema element node's contents + * @param eElement A data schema element node. + * @param schema The data schema + * @throws PIRException + */ private void extractElementNode(Element eElement, DataSchema schema) throws PIRException { // Pull out the element name and type attributes. @@ -203,7 +232,7 @@ private void extractElementNode(Element eElement, DataSchema schema) throws PIRE String type = eElement.getElementsByTagName("type").item(0).getTextContent().trim(); schema.getTypeMap().put(name, type); - // An absent isArray means false, and an empty isArray means true, otherwise take the value. + // An empty isArray or one whose value evaluates to "true" is an array; otherwise (including absence) the element is not an array Node isArrayNode = eElement.getElementsByTagName("isArray").item(0); if (isArrayNode != null) { From 7491376dd5e161b20fa6221c9ac06c7dd3575f74 Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Mon, 1 Aug 2016 10:35:37 -0500 Subject: [PATCH 02/10] Minor changes to words, remove errantly added import. --- .../java/org/apache/pirk/schema/data/DataSchemaLoader.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index f7c076d7..b1ed00d8 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -41,7 +41,6 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; -import scala.tools.nsc.transform.patmat.Logic; /** * Class to load any data schemas specified in the properties file, 'data.schemas' @@ -93,7 +92,7 @@ public class DataSchemaLoader /* Kept for compatibility */ /** * Initializes the static {@link DataSchemaRegistry} with a list of - * available registry names. + * available data schema names. * @throws Exception */ public static void initialize() throws Exception @@ -103,8 +102,8 @@ public static void initialize() throws Exception /* Kept for compatibility */ /** - * Initializes the static {@link org.apache.pirk.schema.data.DataSchemaRegistry} with a list of - * available registry names. + * Initializes the static {@link DataSchemaRegistry} with a list of + * available data schema names. * @param hdfs If true, specifies that the DataSchema is an hdfs file; if false, that it is a regular file. * @param fs Used only when {@paramref hdfs} is true; the {@link org.apache.hadoop.fs.FileSystem} handle for the hdfs in which the DataSchema exists * @throws Exception From f5870b9cb3124733096dcb82d9911eb54de38639 Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Mon, 1 Aug 2016 10:38:53 -0500 Subject: [PATCH 03/10] Minor changes to words, eliminate unnecessary complexity in link name --- .../java/org/apache/pirk/schema/data/DataSchemaLoader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index b1ed00d8..7ed7b08b 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -104,8 +104,8 @@ public static void initialize() throws Exception /** * Initializes the static {@link DataSchemaRegistry} with a list of * available data schema names. - * @param hdfs If true, specifies that the DataSchema is an hdfs file; if false, that it is a regular file. - * @param fs Used only when {@paramref hdfs} is true; the {@link org.apache.hadoop.fs.FileSystem} handle for the hdfs in which the DataSchema exists + * @param hdfs If true, specifies that the data schema is an hdfs file; if false, that it is a regular file. + * @param fs Used only when {@paramref hdfs} is true; the {@link FileSystem} handle for the hdfs in which the data schema exists * @throws Exception */ public static void initialize(boolean hdfs, FileSystem fs) throws Exception From 956ae54332f6d1ecf2523ebbe977f7ac683386a2 Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Mon, 1 Aug 2016 10:45:41 -0500 Subject: [PATCH 04/10] Yet more minor but useful changes to words --- .../java/org/apache/pirk/schema/data/DataSchemaLoader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index 7ed7b08b..e4942491 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -195,9 +195,9 @@ public DataSchema loadSchema(InputStream stream) throws IOException, PIRExceptio } /* - * Parses an XML document + * Parses and normalizes the XML document available on the given stream. * @param stream The input stream. - * @return A Document representing the XML document. + * @return A {@link Document} representing the XML document. * @throws IOException * @throws PIRException */ From 37406b08197e5937f917fe1ee866e5d0ee7ddb51 Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Tue, 2 Aug 2016 08:54:29 -0500 Subject: [PATCH 05/10] Documenting that element names are case sensitive --- src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index e4942491..5cea9fda 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -51,7 +51,7 @@ * * name of the schema * - * element name + * element name; note that element names are case sensitive * class name or type name (if Java primitive type) of the element * whether or not the schema element is an array within the data. * Set to true by including this tag with no text or the string "true" (comparison is case-insensitive). From c6f6549a00fb30aa6fc435a220aa1c2a739a4f8b Mon Sep 17 00:00:00 2001 From: Walter Ray-Dulany Date: Thu, 4 Aug 2016 08:31:01 -0500 Subject: [PATCH 06/10] Now updating comments for QuerySchemeLoader --- .../pirk/schema/query/QuerySchemaLoader.java | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java index 00d2d0c1..0a693e71 100644 --- a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java @@ -50,15 +50,15 @@ * Class to load any query schemas specified in the properties file, 'query.schemas' *

* Schemas should be specified as follows: - * - *

- * {@code
- *  
+ *
+ * 
{@code
+ * 
  *     name of the schema 
  *     name of the data schema over which this query is run 
  *     name of the element in the data schema that will be the selector 
  *    
- *        element name of element in the data schema to include in the query response 
+ *        element name of element in the data schema to include in the query response; just
+ *              as with the data schema, the element name is case sensitive
  *    
  *     (optional) name of the filter class to use to filter the data 
  *     (optional)
@@ -91,12 +91,24 @@ public class QuerySchemaLoader
   }
 
   /* Kept for compatibility */
+  /**
+   * Initializes the static {@link QuerySchemaRegistry} with a list of
+   * query schema names.
+   * @throws Exception
+   */
   public static void initialize() throws Exception
   {
     initialize(false, null);
   }
 
   /* Kept for compatibility */
+  /**
+   * Initializes the static {@link QuerySchemaRegistry} with a list of
+   * available query schema names.
+   * @param hdfs If true, specifies that the query schema is an hdfs file; if false, that it is a regular file.
+   * @param fs Used only when {@paramref hdfs} is true; the {@link FileSystem} handle for the hdfs in which the query schema exists
+   * @throws Exception
+   */
   public static void initialize(boolean hdfs, FileSystem fs) throws Exception
   {
     String querySchemas = SystemConfiguration.getProperty("query.schemas", "none");
@@ -241,6 +253,10 @@ public QuerySchema loadSchema(InputStream stream) throws IOException, PIRExcepti
 
   /*
    * Parses and normalizes the XML document available on the given stream.
+   * @param stream The input stream.
+   * @return A {@link Document} representing the XML document.
+   * @throws IOException
+   * @throws PIRException
    */
   private Document parseXMLDocument(InputStream stream) throws IOException, PIRException
   {
@@ -261,6 +277,10 @@ private Document parseXMLDocument(InputStream stream) throws IOException, PIRExc
 
   /*
    * Returns the possibly empty set of element names over which the filter is applied, maintaining document order.
+   *
+   * @param doc
+   * @return
+   * @throws PIRException
    */
   private Set extractFilteredElementNames(Document doc) throws PIRException
   {

From 3726f36de429e32cb2372546b9b2712a5aacd3a9 Mon Sep 17 00:00:00 2001
From: Walter Ray-Dulany 
Date: Thu, 4 Aug 2016 10:05:36 -0500
Subject: [PATCH 07/10] A few more comment updates for the DataSchemaLoader

---
 .../pirk/schema/data/DataSchemaLoader.java    | 34 ++++++++++++++-----
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
index 5cea9fda..45ca218c 100644
--- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
+++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
@@ -104,8 +104,12 @@ public static void initialize() throws Exception
   /**
    * Initializes the static {@link DataSchemaRegistry} with a list of
    * available data schema names.
-   * @param hdfs If true, specifies that the data schema is an hdfs file; if false, that it is a regular file.
-   * @param fs Used only when {@paramref hdfs} is true; the {@link FileSystem} handle for the hdfs in which the data schema exists
+   * @param hdfs
+   *          If true, specifies that the data schema is an hdfs file; if
+   *          false, that it is a regular file.
+   * @param fs
+   *          Used only when {@paramref hdfs} is true; the {@link FileSystem}
+   *          handle for the hdfs in which the data schema exists
    * @throws Exception
    */
   public static void initialize(boolean hdfs, FileSystem fs) throws Exception
@@ -159,9 +163,9 @@ public DataSchemaLoader()
    *          The source of the XML data schema description.
    * @return The data schema.
    * @throws IOException
-   *           A problem occurred reading from the given stream.
+   *          A problem occurred reading from the given stream.
    * @throws PIRException
-   *           The schema description is invalid.
+   *          The schema description is invalid.
    */
   public DataSchema loadSchema(InputStream stream) throws IOException, PIRException
   {
@@ -196,8 +200,10 @@ public DataSchema loadSchema(InputStream stream) throws IOException, PIRExceptio
 
   /*
    * Parses and normalizes the XML document available on the given stream.
-   * @param stream The input stream.
-   * @return A {@link Document} representing the XML document.
+   * @param stream
+   *          The input stream.
+   * @return
+   *          A {@link Document} representing the XML document.
    * @throws IOException
    * @throws PIRException
    */
@@ -220,8 +226,10 @@ private Document parseXMLDocument(InputStream stream) throws IOException, PIRExc
 
   /*
    * Extracts a data schema element node's contents
-   * @param eElement A data schema element node.
-   * @param schema The data schema
+   * @param eElement
+   *          A data schema element node.
+   * @param schema
+   *          The data schema
    * @throws PIRException
    */
   private void extractElementNode(Element eElement, DataSchema schema) throws PIRException
@@ -274,6 +282,10 @@ private void extractElementNode(Element eElement, DataSchema schema) throws PIRE
 
   /*
    * Checks the given type name is a supported Java primitive type, and throws a PIRException if not.
+   *
+   * @param typeName
+   *          The type name to check.
+   * @throws PIRException
    */
   void validateIsPrimitiveType(String typeName) throws PIRException
   {
@@ -287,6 +299,12 @@ void validateIsPrimitiveType(String typeName) throws PIRException
    * Creates a new instance of a class with the given type name.
    * 
    * Throws an exception if the class cannot be instantiated, or it does not implement the required interface.
+   *
+   * @param partitionerTypeName
+   *          The name of the {@link DataPartitioner} subclass to instantiate.
+   * @return
+   *          An instance of the named {@link DataPartitioner} subclass.
+   * @throws PIRException
    */
   DataPartitioner instantiatePartitioner(String partitionerTypeName) throws PIRException
   {

From dcaf4210dfa5ecce5aab6307126b56b5b07bba3d Mon Sep 17 00:00:00 2001
From: Walter Ray-Dulany 
Date: Thu, 4 Aug 2016 14:54:22 -0500
Subject: [PATCH 08/10] Turn all the pre-function comments to javadocs; fix a
 tag.

---
 .../org/apache/pirk/schema/data/DataSchemaLoader.java  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
index 45ca218c..a6f2cca5 100644
--- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
+++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java
@@ -108,7 +108,7 @@ public static void initialize() throws Exception
    *          If true, specifies that the data schema is an hdfs file; if
    *          false, that it is a regular file.
    * @param fs
-   *          Used only when {@paramref hdfs} is true; the {@link FileSystem}
+   *          Used only when {@code hdfs} is true; the {@link FileSystem}
    *          handle for the hdfs in which the data schema exists
    * @throws Exception
    */
@@ -198,7 +198,7 @@ public DataSchema loadSchema(InputStream stream) throws IOException, PIRExceptio
     return dataSchema;
   }
 
-  /*
+  /**
    * Parses and normalizes the XML document available on the given stream.
    * @param stream
    *          The input stream.
@@ -224,7 +224,7 @@ private Document parseXMLDocument(InputStream stream) throws IOException, PIRExc
     return doc;
   }
 
-  /*
+  /**
    * Extracts a data schema element node's contents
    * @param eElement
    *          A data schema element node.
@@ -280,7 +280,7 @@ private void extractElementNode(Element eElement, DataSchema schema) throws PIRE
     logger.info("name = " + name + " javaType = " + type + " isArray = " + schema.getArrayElements().contains(name) + " partitioner " + partitionerTypeName);
   }
 
-  /*
+  /**
    * Checks the given type name is a supported Java primitive type, and throws a PIRException if not.
    *
    * @param typeName
@@ -295,7 +295,7 @@ void validateIsPrimitiveType(String typeName) throws PIRException
     }
   }
 
-  /*
+  /**
    * Creates a new instance of a class with the given type name.
    * 
    * Throws an exception if the class cannot be instantiated, or it does not implement the required interface.

From c46ed6042d29510e1e653e5996b9ab2bcf2bce7c Mon Sep 17 00:00:00 2001
From: Walter Ray-Dulany 
Date: Thu, 4 Aug 2016 17:36:51 -0500
Subject: [PATCH 09/10] QuerySchemaLoader: Turn all the pre-function comments
 to javadocs; fix a tag.

---
 .../pirk/schema/query/QuerySchemaLoader.java  | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
index 0a693e71..cc08833d 100644
--- a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
+++ b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
@@ -105,8 +105,12 @@ public static void initialize() throws Exception
   /**
    * Initializes the static {@link QuerySchemaRegistry} with a list of
    * available query schema names.
-   * @param hdfs If true, specifies that the query schema is an hdfs file; if false, that it is a regular file.
-   * @param fs Used only when {@paramref hdfs} is true; the {@link FileSystem} handle for the hdfs in which the query schema exists
+   * @param hdfs
+   *    If true, specifies that the query schema is an hdfs file; if false,
+   *    that it is a regular file.
+   * @param fs
+   *    Used only when {@code hdfs} is true; the {@link FileSystem} handle
+   *    for the hdfs in which the query schema exists
    * @throws Exception
    */
   public static void initialize(boolean hdfs, FileSystem fs) throws Exception
@@ -161,9 +165,9 @@ public QuerySchemaLoader()
    *          The source of the XML query schema description.
    * @return The query schema.
    * @throws IOException
-   *           A problem occurred reading from the given stream.
+   *          A problem occurred reading from the given stream.
    * @throws PIRException
-   *           The schema description is invalid.
+   *          The schema description is invalid.
    */
   public QuerySchema loadSchema(InputStream stream) throws IOException, PIRException
   {
@@ -251,10 +255,11 @@ public QuerySchema loadSchema(InputStream stream) throws IOException, PIRExcepti
     return querySchema;
   }
 
-  /*
+  /**
    * Parses and normalizes the XML document available on the given stream.
-   * @param stream The input stream.
-   * @return A {@link Document} representing the XML document.
+   * @param stream
+   *          The input stream.
+   * @return A Document representing the XML document.
    * @throws IOException
    * @throws PIRException
    */
@@ -275,11 +280,12 @@ private Document parseXMLDocument(InputStream stream) throws IOException, PIRExc
     return doc;
   }
 
-  /*
+  /**
    * Returns the possibly empty set of element names over which the filter is applied, maintaining document order.
    *
    * @param doc
-   * @return
+   *          An XML document specifying names upon which we will filter the query.
+   * @return The set of names upon which we will filter the query.
    * @throws PIRException
    */
   private Set extractFilteredElementNames(Document doc) throws PIRException
@@ -313,10 +319,17 @@ private Set extractFilteredElementNames(Document doc) throws PIRExceptio
     return filteredNamesSet;
   }
 
-  /*
+  /**
    * Extracts a top level, single value from the XML structure.
    * 
    * Throws an exception if there is not exactly one tag with the given name.
+   *
+   * @param doc
+   *          The XML document from which we extract data
+   * @param tagName
+   *          The name of the tag we wish to extract from the {@code doc}
+   * @return The text content of the tag.
+   * @throws PIRException
    */
   private String extractValue(Document doc, String tagName) throws PIRException
   {
@@ -328,6 +341,18 @@ private String extractValue(Document doc, String tagName) throws PIRException
     return itemList.item(0).getTextContent().trim();
   }
 
+  /**
+   * Instantiate the specified filter.
+   *
+   * Exceptions derive from call to {@link FilterFactory.getFilter}
+   * @param filterTypeName
+   *          The name of the filter class we are instantiating
+   * @param filteredElementNames
+   *          The set of names of elements of the data schema up which the filter will act.
+   * @return An instantiation of the filter, set up to filter upon the specified names.
+   * @throws IOException
+   * @throws PIRException
+   */
   private DataFilter instantiateFilter(String filterTypeName, Set filteredElementNames) throws IOException, PIRException
   {
     return filterTypeName.equals(NO_FILTER) ? null : FilterFactory.getFilter(filterTypeName, filteredElementNames);

From 08e58c758ae579170239e826fcaa83250718e4c7 Mon Sep 17 00:00:00 2001
From: Walter Ray-Dulany 
Date: Thu, 4 Aug 2016 17:39:42 -0500
Subject: [PATCH 10/10] I wish that I could reference members and methods in a
 javadoc link; alas, no.

---
 .../java/org/apache/pirk/schema/query/QuerySchemaLoader.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
index cc08833d..3705c96d 100644
--- a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
+++ b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
@@ -344,7 +344,7 @@ private String extractValue(Document doc, String tagName) throws PIRException
   /**
    * Instantiate the specified filter.
    *
-   * Exceptions derive from call to {@link FilterFactory.getFilter}
+   * Exceptions derive from call to the {@code getFilter} method of {@link FilterFactory}
    * @param filterTypeName
    *          The name of the filter class we are instantiating
    * @param filteredElementNames