From 15736dfd9edfce3398a023cb12a9c2c86a5c38a5 Mon Sep 17 00:00:00 2001 From: Andrew Chafos Date: Mon, 10 Aug 2020 15:17:25 -0400 Subject: [PATCH] Added 2 new Controllers that perform Daffodil Parse and Unparse using Records, Implemented nearly all CLI options for both existing Processors and new Controllers, and moved all shared properties and methods to new global file for Processors and Controllers to use --- .gitignore | 4 +- README.md | 44 +- nifi-daffodil-nar/pom.xml | 8 +- nifi-daffodil-processors/pom.xml | 116 ++++- .../AbstractDaffodilController.java | 85 ++++ .../nifi/controllers/DaffodilReader.java | 31 ++ .../controllers/DaffodilRecordReader.java | 361 ++++++++++++++ .../controllers/DaffodilRecordSetWriter.java | 293 +++++++++++ .../nifi/controllers/DaffodilWriter.java | 37 ++ .../com/tresys/nifi/infoset/InfosetNode.java | 104 ++++ .../processors/AbstractDaffodilProcessor.java | 315 +++--------- .../tresys/nifi/processors/DaffodilParse.java | 17 +- .../nifi/processors/DaffodilUnparse.java | 18 +- .../DaffodilCompileException.java | 13 +- .../DaffodilProcessingException.java | 13 +- .../tresys/nifi/util/DaffodilResources.java | 374 ++++++++++++++ ...g.apache.nifi.controller.ControllerService | 16 + .../nifi/infoset/InfosetNodeInputter.scala | 105 ++++ .../nifi/infoset/InfosetNodeOutputter.scala | 79 +++ .../tresys/nifi/schema/RecordSchemaNode.scala | 132 +++++ .../com/tresys/nifi/schema/RecordUtil.scala | 113 +++++ .../com/tresys/nifi/schema/RecordWalker.scala | 225 +++++++++ .../controllers/TestDaffodilControllers.java | 470 ++++++++++++++++++ .../processors/TestDaffodilProcessor.java | 268 +++++----- .../nifi/schema/TestDFDLRecordSchema.java | 75 +++ .../choiceWithGroupRef.dfdl.xsd | 61 +++ .../optionalField.dfdl.xsd | 55 ++ .../bitlength.dfdl.xsd | 2 +- .../csv-invalid.dfdl.xsd | 0 .../csv.dfdl.xsd | 0 .../TestDaffodilComponents/digitList.dfdl.xsd | 45 ++ .../leftover.bin | 0 .../nestedChoices.dfdl.xsd | 57 +++ .../noleftover.bin | 0 .../TestDaffodilComponents/testConfig.xml | 5 + .../tokens.csv | 0 .../noleftover.bin.xml | 1 + .../simpleList-extVars.txt.json | 1 + .../simpleList-extraData.txt.json | 1 + .../TestDaffodilControllers/tokens.csv.json | 1 + .../TestDaffodilControllers/tokens.csv.xml | 1 + .../simpleList-extVars.txt.json | 1 + pom.xml | 21 +- 43 files changed, 3138 insertions(+), 430 deletions(-) create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/AbstractDaffodilController.java create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilReader.java create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordReader.java create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordSetWriter.java create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilWriter.java create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/infoset/InfosetNode.java rename nifi-daffodil-processors/src/main/java/com/tresys/nifi/{processors => util}/DaffodilCompileException.java (64%) rename nifi-daffodil-processors/src/main/java/com/tresys/nifi/{processors => util}/DaffodilProcessingException.java (64%) create mode 100644 nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilResources.java create mode 100644 nifi-daffodil-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService create mode 100644 nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeInputter.scala create mode 100644 nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeOutputter.scala create mode 100644 nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordSchemaNode.scala create mode 100644 nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordUtil.scala create mode 100644 nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordWalker.scala create mode 100644 nifi-daffodil-processors/src/test/java/com/tresys/nifi/controllers/TestDaffodilControllers.java create mode 100644 nifi-daffodil-processors/src/test/java/com/tresys/nifi/schema/TestDFDLRecordSchema.java create mode 100644 nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/choiceWithGroupRef.dfdl.xsd create mode 100644 nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/optionalField.dfdl.xsd rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/bitlength.dfdl.xsd (99%) rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/csv-invalid.dfdl.xsd (100%) rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/csv.dfdl.xsd (100%) create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/leftover.bin (100%) create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/noleftover.bin (100%) create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/testConfig.xml rename nifi-daffodil-processors/src/test/resources/{TestDaffodilProcessor => TestDaffodilComponents}/tokens.csv (100%) create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/noleftover.bin.xml create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.json create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.xml create mode 100644 nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json diff --git a/.gitignore b/.gitignore index eb5a316..d953095 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -target +target/* +nifi-daffodil-nar/target/ +nifi-daffodil-processors/target/ \ No newline at end of file diff --git a/README.md b/README.md index 8ebc03d..4990cfc 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,50 @@ This repository contains the source for two NiFi processors which use [Apache Daffodil (incubating)](https://daffodil.apache.org), an open source implementation of the [Data Format Description Language (DFDL)](https://www.ogf.org/ogf/doku.php/standards/dfdl/dfdl) to parse/unparse -data to/from an XML infoset. The two processor included are: +data to/from NiFi Records, which is then transformed into an Infoset based on the supplied NiFi Controller. +The two processors included are: -* DaffodilParse: Reads a FlowFile and parses the data into an XML infoset -* DaffodilUnparse: Reads a FlowFile, in the form of an XML infoset, and - unparses the infoset to the original file format +* DaffodilParse: Reads a FlowFile and parses the data into a NiFi Record, +which is then converted into an Infoset by a NiFi RecordSetWriter component. +* DaffodilUnparse: Reads a FlowFile containing an infoset in some form, reads it using the correct NiFi RecordReader +component and converts it into Records, and then unparses these Records to the original file format. + +## Processor Properties + +Each Processor has a number of configurable properties intended to be analogous +to the [CLI options](https://daffodil.apache.org/cli/) for the Daffodil tool. +Here are is a note about the __Stream__ option: + +- __Stream Mode:__ This mode is disabled by default, but when enabled parsing will continue in the situation +that there is leftover data rather than routing to failure; it is simply repeated, generating a Record per parse. +If they are all successful, a Set of Records will be generated. +When using this mode for the XML Reader and Writer components, the Writer component must be configured with a +name for the Root Tag, and the Reader component must be configured with "Expect Records as Array" set to true. + +And here is a note about __Tunables__ and __External Variables__: + +- To add External Variables to the Processor, simply add custom key/value pairs as custom Properties when +configuring the Processor. To add Tunables, do the same thing but add a "+" character in front of the name +of the tunable variable; i.e. +maxOccursCount would be the key and something like 10 would be the value. + +## Note about the Controllers + +Currently, when using the DaffodilReader and DaffodilWriter Controllers in this project, unparsing from XML is not fully supported due to the fact that the NiFi XMLReader Controller ignores empty XML elements. Unparsing from XML is only supported for XML infosets that do not contain any empty XML elements. However, unparsing via JSON is fully supported. ## Build Instructions +**This is a specific step for the development branch**: + +Because this project depends on a snapshot version of Daffodil, in order to run `mvn install` you +must first clone the latest version of [Apache Daffodil](https://github.com/apache/incubator-daffodil) +and run + + sbt publishM2 + +This step will not be necessary once Daffodil 3.0.0 is released. + +Then, the following should work as expected: + This repository uses the maven build environment. To create a nar file for use in Apache NiFi, run diff --git a/nifi-daffodil-nar/pom.xml b/nifi-daffodil-nar/pom.xml index 1612d55..207dbc1 100644 --- a/nifi-daffodil-nar/pom.xml +++ b/nifi-daffodil-nar/pom.xml @@ -13,17 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 com.tresys nifi-daffodil - 1.5 + 2.0 nifi-daffodil-nar - 1.5 + 2.0 nar true @@ -34,7 +34,7 @@ com.tresys nifi-daffodil-processors - 1.5 + 2.0 diff --git a/nifi-daffodil-processors/pom.xml b/nifi-daffodil-processors/pom.xml index 42c4868..3a09e76 100644 --- a/nifi-daffodil-processors/pom.xml +++ b/nifi-daffodil-processors/pom.xml @@ -13,53 +13,145 @@ See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 com.tresys nifi-daffodil - 1.5 + 2.0 nifi-daffodil-processors jar + + + + jitpack.io + https://jitpack.io + + + + + + com.github.apache + incubator-daffodil + 92d2036e3d + + org.apache.nifi nifi-api + ${nifi.version} + + + + org.apache.nifi + nifi-record + ${nifi.version} + org.apache.nifi - nifi-utils + nifi-record-serialization-service-api + ${nifi.version} + compile + + + org.apache.nifi + nifi-record-serialization-services-nar + ${nifi.version} + nar + + + + + org.apache.nifi + nifi-processor-utils + ${nifi.version} + + + com.google.guava guava 18.0 + - org.apache.daffodil - daffodil-japi_2.12 - 2.5.0 + org.apache.nifi + nifi-mock + ${nifi.version} + test - + org.apache.nifi - nifi-mock + nifi-standard-processors + ${nifi.version} test + - org.slf4j - slf4j-simple + org.apache.nifi + nifi-record-serialization-services + ${nifi.version} test + - junit - junit + org.apache.nifi + nifi-schema-registry-service-api + ${nifi.version} test + + + + + + net.alchim31.maven + scala-maven-plugin + 4.4.0 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + + + + net.alchim31.maven + scala-maven-plugin + 4.4.0 + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/AbstractDaffodilController.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/AbstractDaffodilController.java new file mode 100644 index 0000000..70af796 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/AbstractDaffodilController.java @@ -0,0 +1,85 @@ +package com.tresys.nifi.controllers; + +import com.tresys.nifi.util.DaffodilCompileException; +import com.tresys.nifi.util.DaffodilResources; +import static com.tresys.nifi.util.DaffodilResources.DataProcessorSchemaPair; +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public abstract class AbstractDaffodilController extends AbstractControllerService { + + private volatile ConfigurationContext context; + protected volatile StreamMode streamMode; + + @OnEnabled + public void storeConfiguration(final ConfigurationContext context) { + this.streamMode = StreamMode.valueOf(context.getProperty(STREAM_MODE).getValue()); + this.context = context; + } + + protected DataProcessorSchemaPair getPair(Map attributes) throws DaffodilCompileException { + if (context == null) { + return null; + } else { + String dfdlFile = context.getProperty(DaffodilResources.DFDL_SCHEMA_FILE).evaluateAttributeExpressions(attributes).getValue(); + return DaffodilResources.getDataProcessorSchemaPair(getLogger(), context, dfdlFile); + } + } + + public enum StreamMode { + OFF, ALL_SUCCESSFUL, ONLY_SUCCESSFUL + } + + public static final AllowableValue STREAM_MODE_OFF + = new AllowableValue(StreamMode.OFF.name(), StreamMode.OFF.name(), "Stream Mode is off."); + public static final AllowableValue STREAM_MODE_ALL_SUCCESSFUL + = new AllowableValue(StreamMode.ALL_SUCCESSFUL.name(), StreamMode.ALL_SUCCESSFUL.name(), + "Multiple records are parsed until there is no remaining data. If there is a failure, stop and discard all records."); + public static final AllowableValue STREAM_MODE_ONLY_SUCCESSFUL + = new AllowableValue(StreamMode.ONLY_SUCCESSFUL.name(), StreamMode.ONLY_SUCCESSFUL.name(), + "Multiple records are parsed until there is no remaining data. If there is a failure, stop, and keep all successful records."); + + /** + * If this Property is set to true, then multiple Records will be produced when there is leftover data, with each one beginning where + * the last one left off. Normally leftover data just errors out. We will still route to failure if *any* of these Records + * are not successfully produced. Making this option true does not cause any issues for unparsing, as the unparse Record component is + * a RecordSetWriterFactory, which is able to handle the data containing a set of Records rather than just one Record. + */ + static final PropertyDescriptor STREAM_MODE = new PropertyDescriptor.Builder() + .name("stream-mode") + .displayName("Stream Mode") + .description("Rather than throwing an error when left over data exists after a parse, one can repeat the parse with the remaining data. " + + "With the 'All Successful' Mode, an error is thrown if any of the parses fail, whereas with 'Only Successful', the parse will succeed," + + " and only successful parses show up in the output.") + .required(false) + .defaultValue(STREAM_MODE_OFF.getValue()) + .allowableValues(STREAM_MODE_OFF, STREAM_MODE_ALL_SUCCESSFUL, STREAM_MODE_ONLY_SUCCESSFUL) + .build(); + + private static final List controllerProperties; + + static { + List properties = new ArrayList<>(DaffodilResources.daffodilProperties); + properties.add(STREAM_MODE); + controllerProperties = Collections.unmodifiableList(properties); + } + + @Override + protected List getSupportedPropertyDescriptors() { + return controllerProperties; + } + + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + return DaffodilResources.getSupportedDynamicPropertyDescriptor(propertyDescriptorName); + } + +} \ No newline at end of file diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilReader.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilReader.java new file mode 100644 index 0000000..9167eb5 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilReader.java @@ -0,0 +1,31 @@ +package com.tresys.nifi.controllers; + +import com.tresys.nifi.util.DaffodilResources.DataProcessorSchemaPair; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.serialization.RecordReader; +import org.apache.nifi.serialization.RecordReaderFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; + +@Tags({"xml", "json", "daffodil", "dfdl", "schema", "xsd"}) +@CapabilityDescription("Use Daffodil and a user-specified DFDL schema to transform data to an infoset into Records") +public class DaffodilReader extends AbstractDaffodilController implements RecordReaderFactory { + + @Override + public RecordReader createRecordReader(Map variables, InputStream inputStream, + long inputLength, ComponentLog logger) { + try { + DataProcessorSchemaPair pair = getPair(variables); + return new DaffodilRecordReader(pair.getSchema(), inputStream, pair.getDataProcessor(), streamMode, logger); + } catch (IOException ioe) { + getLogger().error("Unable to obtain DataProcessor and/or Schema due to {}", new Object[]{ioe.getMessage()}); + throw new ProcessException("Unable to obtain DataProcessor and/or Schema", ioe); + } + } + +} diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordReader.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordReader.java new file mode 100644 index 0000000..a193769 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordReader.java @@ -0,0 +1,361 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.controllers; + +import com.tresys.nifi.controllers.AbstractDaffodilController.StreamMode; +import com.tresys.nifi.infoset.InfosetNode; +import com.tresys.nifi.infoset.InfosetNodeOutputter; +import com.tresys.nifi.schema.OptionalRecordField; +import com.tresys.nifi.util.DaffodilProcessingException; +import org.apache.daffodil.japi.DataLocation; +import org.apache.daffodil.japi.DataProcessor; +import org.apache.daffodil.japi.ParseResult; +import org.apache.daffodil.japi.io.InputSourceDataInputStream; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.serialization.RecordReader; +import org.apache.nifi.serialization.SimpleRecordSchema; +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.MapRecord; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordField; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.type.ArrayDataType; +import org.apache.nifi.serialization.record.type.ChoiceDataType; +import org.apache.nifi.serialization.record.type.RecordDataType; + +import java.io.IOException; +import java.io.InputStream; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class DaffodilRecordReader implements RecordReader { + + private final DataProcessor dataProcessor; + private final RecordSchema originalSchema; + private RecordSchema currentSchema; + private final InputStream inputStream; + private final InputSourceDataInputStream dataInputStream; + private long totalBits, bitsRead; + private boolean coerceTypes; + private final ComponentLog logger; + private final StreamMode streamMode; + + public DaffodilRecordReader(RecordSchema schema, InputStream inputStream, DataProcessor dataProcessor, StreamMode streamMode, ComponentLog logger) { + this.originalSchema = schema; + this.coerceTypes = false; + this.inputStream = inputStream; + this.dataInputStream = new InputSourceDataInputStream(inputStream); + this.totalBits = -1; + this.bitsRead = 0; + this.dataProcessor = dataProcessor; + this.logger = logger; + this.currentSchema = originalSchema; + this.streamMode = streamMode; + } + + @Override + public Record nextRecord(boolean shouldCoerceTypes, boolean ignored) throws IOException { + this.coerceTypes = shouldCoerceTypes; + if (inputStream.available() > 0) { + this.totalBits = inputStream.available() * 8; + } + if (inputStream.available() > 0 || (streamMode != StreamMode.OFF && bitsRead < totalBits)) { + try { + InfosetNode rootNode = parseToInfosetNode(); + Record result = complexNodeToRecord(originalSchema, rootNode); + currentSchema = result.getSchema(); + return result; + } catch (DaffodilProcessingException possiblyIgnored) { + if (streamMode != StreamMode.ONLY_SUCCESSFUL) { + logger.error("Error Performing Daffodil Parse: {}", new Object[]{possiblyIgnored}); + throw possiblyIgnored; + } + return null; + } + } else { + return null; + } + } + + /** + * Perform a parse using the given DataProcessor to an Outputter that outputs + * the Infoset data in the form of an InfosetNode tree + * @return the InfosetNode result of the parse + */ + private InfosetNode parseToInfosetNode() throws DaffodilProcessingException { + InfosetNodeOutputter outputter = new InfosetNodeOutputter(); + ParseResult result = dataProcessor.parse(dataInputStream, outputter); + if (result.isError()) { + throw new DaffodilProcessingException("Failed to parse: " + result.getDiagnostics()); + } + if (!outputter.getResult().isDefined()) { + throw new DaffodilProcessingException("NodeOutputter did not successfully parse infoset!"); + } + // Currently, if all bytes are not consumed and the Processor is not set to stream mode, we + // throw an Exception and then, as a result, route to failure + DataLocation loc = result.location(); + if (loc.bitPos1b() - 1 <= bitsRead) { + throw new DaffodilProcessingException( + String.format( + "No data consumed! The current parse started and ended with %s bit(s)" + + " having been read when trying to parse %s", + bitsRead, outputter.getResult().get() + ) + ); + } else { + bitsRead = loc.bitPos1b() - 1; + } + if (streamMode == StreamMode.OFF && totalBits != bitsRead) { + throw new DaffodilProcessingException( + String.format( + "Left over data. Consumed %s bit(s) with %s bit(s) remaining when parsing %s", + bitsRead, totalBits - bitsRead, outputter.getResult().get() + ) + ); + } + return outputter.getResult().get(); + } + + /** + * Converts a given InfosetNode into a Record based on processed values returned by getRecordValue + * @param schema the sub-RecordSchema corresponding to this Record + * @param parentNode the InfosetNode with data corresponding to this Record + * @return a NiFi Record with a possibly modified Schema correctly filled with the data from the Node + */ + private Record complexNodeToRecord(RecordSchema schema, InfosetNode parentNode) throws DaffodilProcessingException { + Map recordMap = new LinkedHashMap<>(); + List modifiedFieldList = new ArrayList<>(); + for (RecordField field: schema.getFields()) { + // If an Optional Field does not occur in the data, it will throw an Exception at some point. + // So, we only propagate Exceptions from fields here if they come from required fields. + try { + List fieldValuePairs + = getRecordValue(field.getFieldName(), field.getDataType(), parentNode); + for (FieldValuePair pair: fieldValuePairs) { + modifiedFieldList.add(pair.toRecordField()); + recordMap.put(pair.name, pair.value); + } + } catch (DaffodilProcessingException possiblyIgnored) { + if (!(field instanceof OptionalRecordField)) { + throw possiblyIgnored; + } + } + } + SimpleRecordSchema newSchema = new SimpleRecordSchema(modifiedFieldList); + /* The XML Writer will need some sort of name for the Root tag encompassing all the Infoset data + * If we set the schema name, then it will just use it as the Root tag name instead of having + * to be configured with an explicit one + */ + schema.getSchemaName().ifPresent(newSchema::setSchemaName); + return new MapRecord(newSchema, recordMap); + } + + /** + * Temporary Wrapper class representing a field-value pair + * Used here because anonymous choices may be transformed into multiple fields + */ + private static class FieldValuePair { + public final String name; + public final DataType type; + public final Object value; + + public FieldValuePair(String name, DataType type, Object value) { + this.name = name; + this.type = type; + this.value = value; + } + + public RecordField toRecordField() { + return new RecordField(name, type); + } + } + + /** + * Searches for the given child based on `fieldName` in the child list of parentNode, and then + * proceeds to convert that Node into a temporary list of FieldValuePairs that will later be converted + * into RecordFields and have the data extracted from them. + * The reason why we don't simply pass the child InfosetNode to this method is that the particular + * complicated for anonymous choices; the child that was selected by the choice isn't known until we invoke + * choiceToPairList + * @param fieldName the name of the child Node/field we are searching for + * @param dataType the type of the current field + * @param parentNode the parent of the list that the current child Node is in + * @return a list of Field-Value Pairs to insert into the Record containing this field + */ + private List getRecordValue(String fieldName, DataType dataType, + InfosetNode parentNode) throws DaffodilProcessingException { + List fieldValuePairs = new ArrayList<>(); + Optional optChild = parentNode.getChild(fieldName); + if (fieldName == null) { + // when no field name is given, we treat the "parentNode" like it's a child node, and dataType matches its + // type instead of the "child"'s type + optChild = Optional.of(parentNode); + } + if (!(dataType instanceof ChoiceDataType) && !optChild.isPresent()) { + throw new DaffodilProcessingException( + String.format( + "Required Schema field %s was not present in child list %s", fieldName, parentNode.childrenToString() + ) + ); + } + if (dataType instanceof RecordDataType) { + RecordDataType recordDataType = (RecordDataType) dataType; + Record subRecord = complexNodeToRecord(recordDataType.getChildSchema(), optChild.get()); + dataType = RecordFieldType.RECORD.getRecordDataType(subRecord.getSchema()); + fieldValuePairs.add(new FieldValuePair(fieldName, dataType, subRecord)); + } else if (dataType instanceof ChoiceDataType) { + ChoiceDataType choiceDataType = (ChoiceDataType) dataType; + List possibleTypes = choiceDataType.getPossibleSubTypes(); + return choiceToPairList(possibleTypes, parentNode); + } else if (dataType instanceof ArrayDataType) { + DataType arrayMemberType = ((ArrayDataType) dataType).getElementType(); + fieldValuePairs.add(arrayToPair(fieldName, arrayMemberType, optChild.get())); + } else { + Object simpleValue; + if (coerceTypes) { + simpleValue = coerceSimpleType(dataType.getFieldType(), optChild.get().getValue()); + } else { + simpleValue = optChild.get().getValue(); + } + fieldValuePairs.add(new FieldValuePair(fieldName, dataType, simpleValue)); + } + return fieldValuePairs; + } + + /** + * Given a parent InfosetNode, iterates through the parent's child list and tries to process one + * that matches one of the given sub-types. + * There are potentially multiple Pairs returned here because a Choice option may be an element + * with multiple fields; note the plural nature of the "allFields" variable. + * @param possibleSubTypes the possible DataTypes this Choice could be. An error is thrown if these + * are not all RecordDataTypes + * @param parentNode the parent InfosetNode whose child list should contain the Choice option to be selected + * @return a list of Pairs representing the Fields & Values that were selected for the Choice + * @throws DaffodilProcessingException if one of the possibleSubTypes is not a RecordDataType, or if none + * of the choice options are successfully selected. + */ + private List choiceToPairList(List possibleSubTypes, + InfosetNode parentNode) throws DaffodilProcessingException { + List fieldValPairs = new ArrayList<>(); + for (DataType possibleType: possibleSubTypes) { + if (!(possibleType instanceof RecordDataType)) { + throw new DaffodilProcessingException("Possible Type of Choice element was not a record!"); + } else { + RecordDataType possRecordType = (RecordDataType) possibleType; + // RecordFields belonging to the RecordSchema for the current Choice Option + List allFields = possRecordType.getChildSchema().getFields(); + List fieldsFound = new ArrayList<>(); + boolean allFound = true; + + for (RecordField field: allFields) { + if (!parentNode.getChild(field.getFieldName()).isPresent()) { + if (!(field instanceof OptionalRecordField)) { + allFound = false; + } + } else { + fieldsFound.add(field); + } + } + if (allFound) { + for (RecordField field: fieldsFound) { + fieldValPairs.addAll(getRecordValue(field.getFieldName(), field.getDataType(), parentNode)); + } + } + } + } + if (fieldValPairs.isEmpty()) { + throw new DaffodilProcessingException( + String.format("InfosetNode Child List %s did not match any choice option of choice %s", + possibleSubTypes.toString(), parentNode.toString() + ) + ); + } + return fieldValPairs; + } + + /** + * Given a parent Array Node, create a Pair that has the name of the Array and the dataType is + * an Array of Choices; the Choice being of every single possible type in the Array. This is necessary + * because anonymous choices change the RecordDataType manifested originally, so an Array that has an + * anonymous choice as some sub-element would need to know this. + * @param originalFieldName the name of this Array + * @param childType the original element type of the Array; note that this is not necessarily the same + * as the element type of the final Array, as the final dataType is obtained via pair.type + * @param arrayNode the InfosetNode representing the current Array + * @return a FieldValuePair as described above + */ + private FieldValuePair arrayToPair(String originalFieldName, DataType childType, + InfosetNode arrayNode) throws DaffodilProcessingException { + List valueArrList = new ArrayList<>(); + List possibleSubTypes = new ArrayList<>(); + for (InfosetNode arrayMember : arrayNode) { + for (FieldValuePair pair : getRecordValue(null, childType, arrayMember)) { + valueArrList.add(pair.value); + possibleSubTypes.add(pair.type); + } + } + DataType choiceType = RecordFieldType.CHOICE.getChoiceDataType(possibleSubTypes); + return new FieldValuePair( + originalFieldName, RecordFieldType.ARRAY.getArrayDataType(choiceType), valueArrList.toArray() + ); + } + + /** + * Attempts to parse a String into another simple type + * @param type the type that the String is attempting to parse into + * @param simpleTypeValue the String to be parsed + * @return the parsed version of the String, if it succeeds + * @throws DaffodilProcessingException if type is not one of the known types or the parse fails + */ + private Object coerceSimpleType(RecordFieldType type, String simpleTypeValue) throws DaffodilProcessingException { + try { + switch (type) { + case STRING: + return simpleTypeValue; + case BYTE: + return Byte.parseByte(simpleTypeValue); + case SHORT: + return Short.parseShort(simpleTypeValue); + case INT: + return Integer.parseInt(simpleTypeValue); + case LONG: + return Long.parseLong(simpleTypeValue); + case BIGINT: + return new BigInteger(simpleTypeValue); + default: + throw new DaffodilProcessingException("Attempted coercing unsupported type " + type); + } + } catch (NumberFormatException nfe) { + throw new DaffodilProcessingException(String.format("Could not cast %s to a %s", simpleTypeValue, type.name())); + } + } + + @Override + public RecordSchema getSchema() { + return currentSchema; + } + + @Override + public void close() throws IOException { + inputStream.close(); + } +} diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordSetWriter.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordSetWriter.java new file mode 100644 index 0000000..d130215 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilRecordSetWriter.java @@ -0,0 +1,293 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.controllers; + +import com.tresys.nifi.controllers.AbstractDaffodilController.StreamMode; +import com.tresys.nifi.infoset.InfosetNode; +import com.tresys.nifi.infoset.InfosetNodeInputter; +import com.tresys.nifi.util.DaffodilProcessingException; +import com.tresys.nifi.schema.OptionalRecordField; +import com.tresys.nifi.schema.RecordUtil; +import org.apache.daffodil.japi.UnparseResult; +import org.apache.daffodil.japi.DataProcessor; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.WriteResult; +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordField; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; +import org.apache.nifi.serialization.record.type.ArrayDataType; +import org.apache.nifi.serialization.record.type.ChoiceDataType; +import org.apache.nifi.serialization.record.type.RecordDataType; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.Channels; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class DaffodilRecordSetWriter implements RecordSetWriter { + + private final ComponentLog logger; + private final DataProcessor dataProcessor; + private final OutputStream outputStream; + private final RecordSchema originalSchema; + private WriteResult result; + private final StreamMode streamMode; + + public DaffodilRecordSetWriter(DataProcessor dataProcessor, OutputStream outputStream, + RecordSchema originalSchema, StreamMode streamMode, ComponentLog logger) { + this.logger = logger; + this.dataProcessor = dataProcessor; + this.outputStream = outputStream; + this.originalSchema = originalSchema; + this.result = WriteResult.EMPTY; + this.streamMode = streamMode; + } + + /** + * Note that nothing involving a RecordSet is supported in this class; only individual Records. + * However, this class IS able to support multiple Records as input; these simply are processed by multiple + * calls to write(Record) - you can see this by how the RecordSetWriter is used in AbstractDaffodilProcessor's + * onTrigger implementation + */ + + @Override + public WriteResult write(RecordSet recordSet) { + throw new UnsupportedOperationException( + "This method is not supported! Please use a Record, not a RecordSet, as a parameter to write()" + ); + } + + @Override + public void beginRecordSet() { } + + @Override + public WriteResult finishRecordSet() throws DaffodilProcessingException { + if (result == null) { + throw new DaffodilProcessingException("Called finishRecordSet() but no valid result was obtained!"); + } else { + return result; + } + } + + @Override + public WriteResult write(Record record) throws DaffodilProcessingException { + if (streamMode != StreamMode.OFF || result.getRecordCount() == 0) { + try { + InfosetNodeInputter infosetNodeInputter + = new InfosetNodeInputter(recordToInfosetNode(originalSchema, record.toMap(), logger), logger); + UnparseResult unparseResult = dataProcessor.unparse(infosetNodeInputter, Channels.newChannel(outputStream)); + if (unparseResult.isError()) { + throw new DaffodilProcessingException( + String.format( + "Could not parse input record %s due to %s", RecordUtil.printRecord(record, ""), + unparseResult.getDiagnostics() + ) + ); + } else { + this.result = WriteResult.of(this.result.getRecordCount() + 1, Collections.emptyMap()); + } + } catch (DaffodilProcessingException possiblyIgnored) { + if (streamMode != StreamMode.ONLY_SUCCESSFUL) { + throw possiblyIgnored; + } + } + } + return this.result; + } + + /** + * Converts a given Record into a Tree of InfosetNodes. We only pass around the values map to helper methods, + * instead of the original Record object directly, because we traverse the Record based on the known Schema + * obtained from the DSOM API, *not* whatever Schema that was provided with the Record. + * @throws DaffodilProcessingException if a required field is not found in the values Map + */ + private static InfosetNode recordToInfosetNode(RecordSchema schema, Map values, + ComponentLog logger) throws DaffodilProcessingException { + InfosetNode recordNode = new InfosetNode("", false); + for (RecordField field: schema.getFields()) { + try { + List nodeResult = recordValueToNode(field.getFieldName(), field.getDataType(), values, logger); + nodeResult.forEach(recordNode::addChild); + } catch (DaffodilProcessingException possiblyIgnored) { + if (!(field instanceof OptionalRecordField)) { + throw possiblyIgnored; + } + } + } + return recordNode; + } + + /** + * Given a field name, type, and map with values, returns one or more InfosetNodes representing + * the data that corresponds to the field name that is found in the map. Processing this data + * is heavily informed by the passed in dataType. Multiple InfosetNodes may be returned due to the fact + * that processed Choices may refer to multiple extracted Field-Value pairs + * @param fieldName the name of the field to be selected from values + * @param dataType the dataType of the current field + * @param values a Map containing the values from the original Record + * @param logger a log component for debugging + * @return a List of InfosetNodes containing the processed data from converting the given Record + * @throws DaffodilProcessingException if fieldName cannot be found in the provided map and the dataType is not + * a Choice + */ + private static List recordValueToNode(String fieldName, DataType dataType, Map values, + ComponentLog logger) throws DaffodilProcessingException { + Optional optValue = Optional.ofNullable(values.get(fieldName)); + if (!(dataType instanceof ChoiceDataType) && !optValue.isPresent()) { + throw new DaffodilProcessingException( + String.format( + "Required Schema field %s was not present in map %s", fieldName, values + ) + ); + } + List nodesToReturn = new ArrayList<>(); + if (dataType instanceof RecordDataType) { + RecordDataType recordDataType = (RecordDataType) dataType; + Record subRecord; + if (optValue.get() instanceof Record) { + subRecord = (Record) optValue.get(); + } else { + throw new DaffodilProcessingException( + String.format("Expected a Record value, but instead got invalid value %s", optValue.get().toString()) + ); + } + // recordToInfosetNode returns an InfosetNode with an empty name and a list of child Nodes. To make + // it fit here, all we have to do is change its name to fieldName. + InfosetNode recordNode = recordToInfosetNode(recordDataType.getChildSchema(), subRecord.toMap(), logger); + recordNode.setName(fieldName); + nodesToReturn.add(recordNode); + } else if (dataType instanceof ChoiceDataType) { + ChoiceDataType choiceDataType = (ChoiceDataType) dataType; + List possibleTypes = choiceDataType.getPossibleSubTypes(); + nodesToReturn = choiceToInfosetNode(possibleTypes, values, logger); + } else if (dataType instanceof ArrayDataType) { + DataType arrayMemberType = ((ArrayDataType) dataType).getElementType(); + Object[] arrValues; + // This was added for XML specifically; an XML reader won't know if an element was originally + // an array of length 1 or just an element. If the dataType is an array, then try to parse the + // data like it was part of an array of length 1. + if (optValue.get() instanceof Object[]) { + arrValues = (Object[]) optValue.get(); + } else { + arrValues = new Object[1]; + arrValues[0] = optValue.get(); + } + List arrNodes = new ArrayList<>(); + for (Object arrMember: arrValues) { + /* + * recordValueToNode normally produces a wrapper InfosetNode that has no name and just some value, + * which normally represents an Array value pretty accurately. + * + * However, RecordInputter will not accept any wrapper elements other than the root; it needs + * the actual data immediately. So, whenever an Array of Records is present, we convert it into + * a List of Infoset Nodes, each of which has the Array's name and are assigned the data of what + * used to be the wrapper nodes. This is exactly what Arrays look like in an XML infoset. + */ + Map subMap = new HashMap<>(); + subMap.put("", arrMember); + for (InfosetNode recordNode: recordValueToNode("", arrayMemberType, subMap, logger)) { + recordNode.setName(fieldName); + arrNodes.add(recordNode); + } + } + nodesToReturn = arrNodes; + } else { + InfosetNode childNode = new InfosetNode(fieldName, false); + childNode.setValue(optValue.get().toString()); + nodesToReturn.add(childNode); + } + return nodesToReturn; + } + + /** + * Given a list of possible sub-types, extracts a value or values that is intended to be the result of a prior + * choice parse and constructs one or more InfosetNodes encompassing these values + * As with parse, this may be more than one value because NiFi Choices for RecordSchemas + * must be of all RecordDataTypes, which may have multiple fields + * @param possibleSubTypes a list of possible sub-types the Choice may take on + * @param values a map for which one or several of the values will be extracted to comprise this Choice + * InfosetNode. + * @param logger a log component for debugging + * @return the List of InfosetNodes generated from processing the selected Choice Option's values + * @throws DaffodilProcessingException if one of the possibleSubTypes is not a RecordDataType, or if none + * of the choice options are successfully selected. + */ + private static List choiceToInfosetNode(List possibleSubTypes, Map values, + ComponentLog logger) throws DaffodilProcessingException { + List children = new ArrayList<>(); + for (DataType possibleType: possibleSubTypes) { + if (!(possibleType instanceof RecordDataType)) { + throw new DaffodilProcessingException("Possible Type of Choice element was not a record!"); + } else { + RecordDataType possRecordType = (RecordDataType) possibleType; + List allFields = possRecordType.getChildSchema().getFields(); + // fieldsPresent will contain *all* fields we are able to find, whereas allFound + // will only be false if a required (non-optional) field is not found. + List fieldsPresent = new ArrayList<>(); + boolean allFound = true; + for (RecordField field: allFields) { + if (values.keySet().stream().noneMatch(name -> name.equals(field.getFieldName()))) { + if (!(field instanceof OptionalRecordField)) { + allFound = false; + break; + } + } else { + fieldsPresent.add(field); + } + } + if (allFound) { + for (RecordField field: fieldsPresent) { + // recordValueToNode may still return multiple InfosetNodes because of nested choices! + // (hence why we have to do addAll, not just add here) + children.addAll(recordValueToNode(field.getFieldName(), field.getDataType(), values, logger)); + } + } + } + } + if (children.isEmpty()) { + throw new DaffodilProcessingException( + String.format("InfosetNode Child List %s did not match any choice option of choice %s", + possibleSubTypes.toString(), values + ) + ); + } + return children; + } + + @Override + public String getMimeType() { + return "text/plain"; + } + + @Override + public void flush() throws IOException { + outputStream.flush(); + } + + @Override + public void close() throws IOException { + outputStream.close(); + } +} diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilWriter.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilWriter.java new file mode 100644 index 0000000..250e1b1 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/controllers/DaffodilWriter.java @@ -0,0 +1,37 @@ +package com.tresys.nifi.controllers; + +import com.tresys.nifi.util.DaffodilCompileException; +import com.tresys.nifi.util.DaffodilResources.DataProcessorSchemaPair; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriterFactory; +import org.apache.nifi.serialization.record.RecordSchema; + +import java.io.OutputStream; +import java.util.Map; + +@Tags({"xml", "json", "daffodil", "dfdl", "schema", "xsd"}) +@CapabilityDescription("Use Daffodil and a user-specified DFDL schema to unparse a Daffodil Infoset into data") +public class DaffodilWriter extends AbstractDaffodilController implements RecordSetWriterFactory { + + @Override + public RecordSchema getSchema(Map variables, RecordSchema readSchema) { + return readSchema; + } + + @Override + public RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, + OutputStream out, Map variables) { + try { + DataProcessorSchemaPair pair = getPair(variables); + return new DaffodilRecordSetWriter(pair.getDataProcessor(), out, pair.getSchema(), streamMode, logger); + } catch (DaffodilCompileException exception) { + getLogger().error("Unable to obtain DataProcessor and/or Schema due to {}", new Object[]{exception.getMessage()}); + throw new ProcessException("Unable to obtain DataProcessor and/or Schema", exception); + } + } + +} diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/infoset/InfosetNode.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/infoset/InfosetNode.java new file mode 100644 index 0000000..17213cf --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/infoset/InfosetNode.java @@ -0,0 +1,104 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.infoset; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Optional; + +public class InfosetNode implements Iterable { + private String name, value; + private final LinkedList children; + public final boolean isArray; + private final static char[][] possibleEnclosingChars = {{'{', '}'}, {'[', ']'}}; + + public InfosetNode(String name, boolean isArray) { + this.name = name; + this.children = new LinkedList<>(); + this.value = null; + this.isArray = isArray; + } + + public InfosetNode(String name) { + this(name, false); + } + + public String getName() { + return name; + } + + public void setName(String newName) { + if (this.name.isEmpty() && newName != null && !newName.isEmpty()) { + name = newName; + } + } + + public String getValue() { + return value; + } + + public void setValue(String newValue) { + if (value != null) { + throw new IllegalArgumentException("Value already set to: " + value); + } else { + value = newValue; + } + } + + public void addChild(InfosetNode newChild) { + children.add(newChild); + } + + @Override + public Iterator iterator() { + return children.iterator(); + } + + public String childrenToString() { + return Arrays.toString(children.stream().map(InfosetNode::getName).toArray()); + } + + public Optional getChild(String childName) { + return children.stream().filter(child -> child.getName().equals(childName)).findAny(); + } + + public String toString() { + return toStringHelper(this, ""); + } + + private static String toStringHelper(InfosetNode node, String tabs) { + StringBuilder toReturn = new StringBuilder(tabs); + toReturn.append(node.name); + toReturn.append(possibleEnclosingChars[node.isArray ? 1 : 0][0]).append("\n"); + if (node.value != null) { + toReturn.append(tabs).append("\t").append(node.value); + if (!node.children.isEmpty()) { + toReturn.append(","); + } + toReturn.append("\n"); + } + if (!node.children.isEmpty()) { + for(InfosetNode child: node.children) { + toReturn.append(toStringHelper(child, tabs + "\t")); + } + } + toReturn.append(tabs).append(possibleEnclosingChars[node.isArray ? 1 : 0][1]).append("\n"); + return toReturn.toString(); + } + +} diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/AbstractDaffodilProcessor.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/AbstractDaffodilProcessor.java index 09c3a46..2a95dab 100644 --- a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/AbstractDaffodilProcessor.java +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/AbstractDaffodilProcessor.java @@ -1,5 +1,5 @@ /* - * Copyright 2018 Tresys Technology, LLC + * Copyright 2020 Tresys Technology, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,127 +16,57 @@ package com.tresys.nifi.processors; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; - +import com.tresys.nifi.util.DaffodilResources; +import org.apache.daffodil.japi.DataProcessor; import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.io.StreamCallback; import org.apache.nifi.util.StopWatch; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; - -import org.apache.daffodil.japi.Compiler; -import org.apache.daffodil.japi.Daffodil; -import org.apache.daffodil.japi.DataProcessor; -import org.apache.daffodil.japi.Diagnostic; -import org.apache.daffodil.japi.ProcessorFactory; -import org.apache.daffodil.japi.WithDiagnostics; -import org.apache.daffodil.japi.ValidationMode; -import org.apache.daffodil.japi.InvalidUsageException; -import org.apache.daffodil.japi.InvalidParserException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import static com.tresys.nifi.util.DaffodilResources.*; public abstract class AbstractDaffodilProcessor extends AbstractProcessor { - abstract protected void processWithDaffodil(final DataProcessor dp, final FlowFile ff, final InputStream in, final OutputStream out, String infosetType) throws IOException; + abstract protected void processWithDaffodil(final DataProcessor dp, + final FlowFile ff, final InputStream in, final OutputStream out, + String infosetType) throws IOException; /** * Returns the mime type of the resulting output FlowFile. If the mime type * cannot be determine, this should return null, and the mime.type * attribute will be removed from the output FlowFile. */ - abstract protected String getOutputMimeType(String infosetType); - abstract protected boolean isUnparse(); - - public static final PropertyDescriptor DFDL_SCHEMA_FILE = new PropertyDescriptor.Builder() - .name("dfdl-schema-file") - .displayName("DFDL Schema File") - .description("Full path to the DFDL schema file that is to be used for parsing/unparsing.") - .required(true) - .expressionLanguageSupported(true) - .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) - .build(); - - public static final PropertyDescriptor PRE_COMPILED_SCHEMA = new PropertyDescriptor.Builder() - .name("pre-compiled-schema") - .displayName("Pre-compiled Schema") - .description("Specify whether the DFDL Schema File is a pre-compiled schema that can be reloaded or if it is an XML schema that needs to be compiled. Set to true if it is pre-compiled.") - .required(true) - .defaultValue("false") - .allowableValues("true", "false") - .expressionLanguageSupported(true) - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - - static final String XML_MIME_TYPE = "application/xml"; - static final String JSON_MIME_TYPE = "application/json"; + protected abstract String getOutputMimeType(String infosetType); + protected abstract boolean isUnparse(); - static final String XML_VALUE = "xml"; - static final String JSON_VALUE = "json"; - static final String ATTRIBUTE_VALUE = "use mime.type attribute"; - - static final AllowableValue INFOSET_TYPE_XML = new AllowableValue(XML_VALUE, XML_VALUE, "The FlowFile representation is XML"); - static final AllowableValue INFOSET_TYPE_JSON = new AllowableValue(JSON_VALUE, JSON_VALUE, "The FlowFile representation is JSON"); - static final AllowableValue INFOSET_TYPE_ATTRIBUTE = new AllowableValue(ATTRIBUTE_VALUE, ATTRIBUTE_VALUE, "The FlowFile representation is determined based on the mime.type attribute"); - - public static final PropertyDescriptor CACHE_SIZE = new PropertyDescriptor.Builder() - .name("cache-size") - .displayName("Cache Size") - .description("Maximum number of compiled DFDL schemas to cache. Zero disables the cache.") - .required(true) - .defaultValue("50") - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .build(); - - public static final PropertyDescriptor CACHE_TTL_AFTER_LAST_ACCESS = new PropertyDescriptor.Builder() - .name("cache-ttl-after-last-access") - .displayName("Cache TTL After Last Access") - .description("The cache TTL (time-to-live) or how long to keep compiled DFDL schemas in the cache after last access.") - .required(true) - .defaultValue("30 mins") - .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("FlowFiles that are successfully transformed will be routed to this relationship") .build(); - - static final String OFF_VALUE = "off"; - static final String LIMITED_VALUE = "limited"; - static final String FULL_VALUE = "full"; - - static final AllowableValue VALIDATION_MODE_OFF = new AllowableValue(OFF_VALUE, OFF_VALUE, "Disable infoset validation"); - static final AllowableValue VALIDATION_MODE_LIMITED= new AllowableValue(LIMITED_VALUE, LIMITED_VALUE, "Facet/restriction validation using Daffodil"); - static final AllowableValue VALIDATION_MODE_FULL = new AllowableValue(FULL_VALUE, FULL_VALUE, "Full schema validation using Xerces"); - - public static final PropertyDescriptor VALIDATION_MODE = new PropertyDescriptor.Builder() - .name("validation-mode") - .displayName("Validation Mode") - .description("The type of validation to be performed on the infoset.") - .required(true) - .defaultValue(OFF_VALUE) - .allowableValues(VALIDATION_MODE_OFF, VALIDATION_MODE_LIMITED, VALIDATION_MODE_FULL) + static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("If a FlowFile cannot be transformed from the configured input format to the configured output format, " + + "the unchanged FlowFile will be routed to this relationship") .build(); /** @@ -146,79 +76,35 @@ public abstract class AbstractDaffodilProcessor extends AbstractProcessor { * create this property descriptor accordingly. */ private PropertyDescriptor INFOSET_TYPE = null; - - - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("When a parse/unparse is successful, the resulting FlowFile is routed to this relationship") - .build(); - - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("When a parse/unparse fails, it will be routed to this relationship") - .build(); + private List properties; @Override protected void init(final ProcessorInitializationContext context) { - List allowableInfosetTypeValues = new ArrayList(Arrays.asList(INFOSET_TYPE_XML, INFOSET_TYPE_JSON)); + List allowableInfosetTypeValues = new ArrayList<>(Arrays.asList(INFOSET_TYPE_XML, INFOSET_TYPE_JSON)); if (isUnparse()) { // using the mime type for infoset type only applies to unparse allowableInfosetTypeValues.add(INFOSET_TYPE_ATTRIBUTE); } INFOSET_TYPE = new PropertyDescriptor.Builder() - .name("infoset-type") - .displayName("Infoset Type") - .description("The format of the FlowFile to output (for parsing) or input (for unparsing).") - .required(true) - .defaultValue(INFOSET_TYPE_XML.getValue()) - .allowableValues(allowableInfosetTypeValues.toArray(new AllowableValue[allowableInfosetTypeValues.size()])) - .build(); - - final List properties = new ArrayList<>(); - properties.add(DFDL_SCHEMA_FILE); - properties.add(PRE_COMPILED_SCHEMA); + .name("infoset-type") + .displayName("Infoset Type") + .description("The format of the FlowFile to output (for parsing) or input (for unparsing).") + .required(true) + .defaultValue(INFOSET_TYPE_XML.getValue()) + .allowableValues(allowableInfosetTypeValues.toArray(new AllowableValue[0])) + .build(); + + final List properties = new ArrayList<>(DaffodilResources.daffodilProperties); properties.add(INFOSET_TYPE); - properties.add(VALIDATION_MODE); - properties.add(CACHE_SIZE); - properties.add(CACHE_TTL_AFTER_LAST_ACCESS); this.properties = Collections.unmodifiableList(properties); - - final Set relationships = new HashSet<>(); - relationships.add(REL_SUCCESS); - relationships.add(REL_FAILURE); - this.relationships = Collections.unmodifiableSet(relationships); - } - - private List properties; - private Set relationships; - private LoadingCache cache; - - static class CacheKey { - public String dfdlSchema; - public Boolean preCompiled; - - public CacheKey(String dfdlSchema, Boolean preCompiled) { - this.dfdlSchema = dfdlSchema; - this.preCompiled = preCompiled; - } - - public int hashCode() { - return Objects.hash(dfdlSchema, preCompiled); - } - - public boolean equals(Object obj) { - if (!(obj instanceof CacheKey)) return false; - if (obj == this) return true; - - CacheKey that = (CacheKey)obj; - return Objects.equals(this.dfdlSchema, that.dfdlSchema) && - Objects.equals(this.preCompiled, that.preCompiled); - } } @Override public Set getRelationships() { + final Set relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); return relationships; } @@ -227,69 +113,26 @@ protected List getSupportedPropertyDescriptors() { return properties; } - private DataProcessor newDaffodilDataProcessor(String dfdlSchema, Boolean preCompiled) throws IOException { - File f = new File(dfdlSchema); - Compiler c = Daffodil.compiler(); - DataProcessor dp; - if (preCompiled) { - try { - dp = c.reload(f); - } catch (InvalidParserException e) { - getLogger().error("Failed to reload pre-compiled DFDL schema: " + dfdlSchema + ". " + e.getMessage()); - throw new DaffodilCompileException("Failed to reload pre-compiled DFDL schema: " + dfdlSchema + ". " + e.getMessage()); - } - } else { - ProcessorFactory pf = c.compileFile(f); - if (pf.isError()) { - getLogger().error("Failed to compile DFDL schema: " + dfdlSchema); - logDiagnostics(pf); - throw new DaffodilCompileException("Failed to compile DFDL schema: " + dfdlSchema); - } - dp = pf.onPath("/"); - if (dp.isError()) { - getLogger().error("Failed to compile DFDL schema: " + dfdlSchema); - logDiagnostics(dp); - throw new DaffodilCompileException("Failed to compile DFDL schema: " + dfdlSchema); - } - } - return dp; - } - - protected DataProcessor getDataProcessor(String dfdlSchema, Boolean preCompiled) throws IOException { - if (cache != null) { - try { - return cache.get(new CacheKey(dfdlSchema, preCompiled)); - } catch (ExecutionException e) { - throw new IOException(e); - } - } else { - return newDaffodilDataProcessor(dfdlSchema, preCompiled); - } + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + return DaffodilResources.getSupportedDynamicPropertyDescriptor(propertyDescriptorName); } @OnScheduled public void onScheduled(final ProcessContext context) { - final ComponentLog logger = getLogger(); - final Integer cacheSize = context.getProperty(CACHE_SIZE).asInteger(); - final Long cacheTTL = context.getProperty(CACHE_TTL_AFTER_LAST_ACCESS).asTimePeriod(TimeUnit.SECONDS); + DaffodilResources.buildCache(getLogger(), context); + } - if (cacheSize > 0) { - CacheBuilder cacheBuilder = CacheBuilder.newBuilder().maximumSize(cacheSize); - if (cacheTTL > 0) { - cacheBuilder = cacheBuilder.expireAfterAccess(cacheTTL, TimeUnit.SECONDS); - } + static final String XML_MIME_TYPE = "application/xml"; + static final String JSON_MIME_TYPE = "application/json"; - cache = cacheBuilder.build( - new CacheLoader() { - public DataProcessor load(CacheKey key) throws IOException { - return newDaffodilDataProcessor(key.dfdlSchema, key.preCompiled); - } - }); - } else { - cache = null; - logger.warn("Daffodil data processor cache disabled because cache size is set to 0."); - } - } + static final String XML_VALUE = "xml"; + static final String JSON_VALUE = "json"; + static final String ATTRIBUTE_VALUE = "use mime.type attribute"; + + static final AllowableValue INFOSET_TYPE_XML = new AllowableValue(XML_VALUE, XML_VALUE, "The FlowFile representation is XML"); + static final AllowableValue INFOSET_TYPE_JSON = new AllowableValue(JSON_VALUE, JSON_VALUE, "The FlowFile representation is JSON"); + static final AllowableValue INFOSET_TYPE_ATTRIBUTE = new AllowableValue(ATTRIBUTE_VALUE, ATTRIBUTE_VALUE, "The FlowFile representation is determined based on the mime.type attribute"); @Override public void onTrigger(final ProcessContext context, final ProcessSession session) { @@ -300,22 +143,14 @@ public void onTrigger(final ProcessContext context, final ProcessSession session final ComponentLog logger = getLogger(); final StopWatch stopWatch = new StopWatch(true); - final String dfdlSchema = context.getProperty(DFDL_SCHEMA_FILE).evaluateAttributeExpressions(original).getValue(); - final Boolean preCompiled = context.getProperty(PRE_COMPILED_SCHEMA).evaluateAttributeExpressions(original).asBoolean(); + final String dfdlSchema = context.getProperty(DaffodilResources.DFDL_SCHEMA_FILE).evaluateAttributeExpressions(original).getValue(); String infosetTypeValue = context.getProperty(INFOSET_TYPE).getValue(); final String infosetType; - final ValidationMode validationMode; - - switch (context.getProperty(VALIDATION_MODE).getValue()) { - case OFF_VALUE: validationMode = ValidationMode.Off; break; - case LIMITED_VALUE: validationMode = ValidationMode.Limited; break; - case FULL_VALUE: validationMode = ValidationMode.Full; break; - default: throw new AssertionError("validation mode was not one of 'off', 'limited', or 'full'"); - } if (infosetTypeValue.equals(ATTRIBUTE_VALUE)) { - if (!isUnparse()) { throw new AssertionError("infoset type 'attribute' should only occur with Daffodil unparse"); } - + if (!isUnparse()) { + throw new AssertionError("infoset type 'attribute' should only occur with Daffodil unparse"); + } String inputMimeType = original.getAttribute(CoreAttributes.MIME_TYPE.key()); switch (inputMimeType == null ? "" : inputMimeType) { case XML_MIME_TYPE: infosetType = XML_VALUE; break; @@ -330,18 +165,14 @@ public void onTrigger(final ProcessContext context, final ProcessSession session } try { - FlowFile output = session.write(original, new StreamCallback() { - @Override - public void process(final InputStream in, final OutputStream out) throws IOException { - DataProcessor dp = getDataProcessor(dfdlSchema, preCompiled); - try { - dp.setValidationMode(validationMode); - } catch (InvalidUsageException e) { - throw new IOException(e); - } + FlowFile output = session.write(original, + (in, out) -> { + DataProcessorSchemaPair pair = + DaffodilResources.getDataProcessorSchemaPair(getLogger(), context, dfdlSchema); + DataProcessor dp = pair.getDataProcessor(); processWithDaffodil(dp, original, in, out, infosetType); } - }); + ); final String outputMimeType = getOutputMimeType(infosetType); if (outputMimeType != null) { output = session.putAttribute(output, CoreAttributes.MIME_TYPE.key(), outputMimeType); @@ -357,16 +188,4 @@ public void process(final InputStream in, final OutputStream out) throws IOExcep } } - protected void logDiagnostics(WithDiagnostics withDiags) { - final ComponentLog logger = getLogger(); - final List diags = withDiags.getDiagnostics(); - for (Diagnostic diag : diags) { - String message = diag.toString(); - if (diag.isError()) { - logger.error(message); - } else { - logger.warn(message); - } - } - } } diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilParse.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilParse.java index 86ed129..3d7e304 100644 --- a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilParse.java +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilParse.java @@ -19,11 +19,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.channels.Channels; -import java.nio.channels.ReadableByteChannel; +import com.tresys.nifi.util.DaffodilProcessingException; +import com.tresys.nifi.util.DaffodilResources; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; @@ -73,23 +71,26 @@ protected String getOutputMimeType(String infosetType) { } @Override - protected void processWithDaffodil(final DataProcessor dp, final FlowFile ff, final InputStream in, final OutputStream out, String infosetType) throws IOException { + protected void processWithDaffodil(final DataProcessor dp, final FlowFile ff, final InputStream in, + final OutputStream out, String infosetType) throws IOException { InputSourceDataInputStream input = new InputSourceDataInputStream(in); InfosetOutputter outputter = getInfosetOutputter(infosetType, out); ParseResult pr = dp.parse(input, outputter); if (pr.isError()) { getLogger().error("Failed to parse {}", new Object[]{ff}); - logDiagnostics(pr); + DaffodilResources.logDiagnostics(getLogger(), pr); throw new DaffodilProcessingException("Failed to parse"); } DataLocation loc = pr.location(); long bitsRead = loc.bitPos1b() - 1; long expectedBits = ff.getSize() * 8; if (expectedBits != bitsRead) { - getLogger().error("Left over data. Consumed {} bit(s) with {} bit(s) remaining when parsing {}", new Object[]{bitsRead, expectedBits - bitsRead, ff}); + getLogger().error( + "Left over data. Consumed {} bit(s) with {} bit(s) remaining when parsing {}", + new Object[]{bitsRead, expectedBits - bitsRead, ff} + ); throw new DaffodilProcessingException("Left over data found"); } out.flush(); } - } diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilUnparse.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilUnparse.java index 770f286..9a5208d 100644 --- a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilUnparse.java +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilUnparse.java @@ -16,14 +16,13 @@ package com.tresys.nifi.processors; -import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.OutputStream; -import java.io.Reader; import java.nio.channels.Channels; import java.nio.channels.WritableByteChannel; +import com.tresys.nifi.util.DaffodilProcessingException; +import com.tresys.nifi.util.DaffodilResources; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; @@ -46,14 +45,14 @@ @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"xml", "json", "daffodil", "dfdl", "schema", "xsd"}) @CapabilityDescription("Use Daffodil and a user-specified DFDL schema to transform an XML or JSON representation of data back to the original data format.") -@WritesAttribute(attribute = "mime.type", description = "If the FlowFile is successfully unparsed, this attriute is removed, as the MIME Type is no longer known.") +@WritesAttribute(attribute = "mime.type", description = "If the FlowFile is successfully unparsed, this attribute is removed, as the MIME Type is no longer known.") public class DaffodilUnparse extends AbstractDaffodilProcessor { private InfosetInputter getInfosetInputter(String infosetType, InputStream is) { switch (infosetType) { case XML_VALUE: return new XMLTextInfosetInputter(is); case JSON_VALUE: return new JsonInfosetInputter(is); - default: throw new AssertionError("Unhandled infoset type: " + infosetType); + default: throw new IllegalArgumentException("Unhandled infoset type: " + infosetType); } } @@ -63,7 +62,7 @@ private InfosetInputter getInfosetInputter(String infosetType, InputStream is) { /** * The resulting output mime type of an unparse action cannot be known * since it is entirely based on the DFDL schema. Since we do not know the - * mime type, return null. This will signifiy to the abstract daffodil + * mime type, return null. This will signify to the abstract daffodil * processor that the mime.type attribute should be removed from the output * FlowFile. */ @@ -73,16 +72,15 @@ protected String getOutputMimeType(String infosetType) { } @Override - protected void processWithDaffodil(final DataProcessor dp, final FlowFile ff, final InputStream in, final OutputStream out, String infosetType) throws IOException { + protected void processWithDaffodil(final DataProcessor dp, final FlowFile ff, final InputStream in, + final OutputStream out, String infosetType) throws DaffodilProcessingException { InfosetInputter inputter = getInfosetInputter(infosetType, in); WritableByteChannel wbc = Channels.newChannel(out); UnparseResult ur = dp.unparse(inputter, wbc); if (ur.isError()) { getLogger().error("Failed to unparse {}", new Object[]{ff}); - logDiagnostics(ur); + DaffodilResources.logDiagnostics(getLogger(), ur); throw new DaffodilProcessingException("Failed to unparse"); } } - } - diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilCompileException.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilCompileException.java similarity index 64% rename from nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilCompileException.java rename to nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilCompileException.java index 440874d..cac6d2b 100644 --- a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilCompileException.java +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilCompileException.java @@ -1,10 +1,9 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Tresys Technology, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -15,7 +14,7 @@ * limitations under the License. */ -package com.tresys.nifi.processors; +package com.tresys.nifi.util; import java.io.IOException; diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilProcessingException.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilProcessingException.java similarity index 64% rename from nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilProcessingException.java rename to nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilProcessingException.java index e3283dc..c989fe0 100644 --- a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/processors/DaffodilProcessingException.java +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilProcessingException.java @@ -1,10 +1,9 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Tresys Technology, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -15,7 +14,7 @@ * limitations under the License. */ -package com.tresys.nifi.processors; +package com.tresys.nifi.util; import java.io.IOException; diff --git a/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilResources.java b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilResources.java new file mode 100644 index 0000000..1471732 --- /dev/null +++ b/nifi-daffodil-processors/src/main/java/com/tresys/nifi/util/DaffodilResources.java @@ -0,0 +1,374 @@ +package com.tresys.nifi.util; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.tresys.nifi.schema.RecordWalker; +import org.apache.daffodil.japi.Compiler; +import org.apache.daffodil.japi.Daffodil; +import org.apache.daffodil.japi.DataProcessor; +import org.apache.daffodil.japi.Diagnostic; +import org.apache.daffodil.japi.InvalidUsageException; +import org.apache.daffodil.japi.ProcessorFactory; +import org.apache.daffodil.japi.ValidationMode; +import org.apache.daffodil.japi.WithDiagnostics; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.context.PropertyContext; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.serialization.record.RecordSchema; +import scala.Predef; +import scala.collection.JavaConverters; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * This class holds a variety of Properties and static methods that are used by both the + * Daffodil Controllers and Processors. Many of these interact with a Cache that holds + * DataProcessor objects so that a given configuration with a Schema need not be re-compiled + * every time a Parse/Unparse is performed. + */ +public class DaffodilResources { + + public static final PropertyDescriptor DFDL_SCHEMA_FILE = new PropertyDescriptor.Builder() + .name("dfdl-schema-file") + .displayName("DFDL Schema File") + .description("Full path to the DFDL schema file that is to be used for parsing/unparsing.") + .required(true) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) + .build(); + + public static final PropertyDescriptor CACHE_SIZE = new PropertyDescriptor.Builder() + .name("cache-size") + .displayName("Cache Size") + .description("Maximum number of compiled DFDL schemas to cache. Zero disables the cache.") + .required(true) + .defaultValue("50") + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .build(); + + public static final PropertyDescriptor CACHE_TTL_AFTER_LAST_ACCESS = new PropertyDescriptor.Builder() + .name("cache-ttl-after-last-access") + .displayName("Cache TTL After Last Access") + .description("The cache TTL (time-to-live) or how long to keep compiled DFDL schemas in the cache after last access.") + .required(true) + .defaultValue("30 mins") + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .build(); + + public static final String OFF_VALUE = "off"; + public static final String LIMITED_VALUE = "limited"; + public static final String FULL_VALUE = "full"; + + public static final AllowableValue VALIDATION_MODE_OFF + = new AllowableValue(OFF_VALUE, OFF_VALUE, "Disable infoset validation"); + public static final AllowableValue VALIDATION_MODE_LIMITED + = new AllowableValue(LIMITED_VALUE, LIMITED_VALUE, "Facet/restriction validation using Daffodil"); + public static final AllowableValue VALIDATION_MODE_FULL + = new AllowableValue(FULL_VALUE, FULL_VALUE, "Full schema validation using Xerces"); + + public static final PropertyDescriptor VALIDATION_MODE = new PropertyDescriptor.Builder() + .name("validation-mode") + .displayName("Validation Mode") + .description("The type of validation to be performed on the infoset.") + .required(true) + .defaultValue(OFF_VALUE) + .allowableValues(VALIDATION_MODE_OFF, VALIDATION_MODE_LIMITED, VALIDATION_MODE_FULL) + .build(); + + /** + * Currently only external variables are supported as configuration file items. + */ + public static final PropertyDescriptor CONFIG_FILE = new PropertyDescriptor.Builder() + .name("config_file") + .displayName("DFDL Config File Path") + .description("Path to an XML-based DFDL Configuration file that contains a list of external variables and tunables.") + .required(false) + .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) + .build(); + + public static PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + if (propertyDescriptorName == null || propertyDescriptorName.replaceAll("\\s+", "").isEmpty()) { + return null; + } else { + boolean isTunable = propertyDescriptorName.startsWith("+"); + String displayName = isTunable ? "Tunable " + propertyDescriptorName.substring(1) : + "External Variable " + propertyDescriptorName; + return new PropertyDescriptor.Builder() + .name(propertyDescriptorName) + .displayName(displayName) + .description("Value to configure a specific " + (isTunable ? "tunable" : "external variable") + " when parsing or unparsing") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + } + } + + public static final List daffodilProperties; + public static final List propertyNames; + + static { + final List propertyList = new ArrayList<>(); + propertyList.add(DaffodilResources.DFDL_SCHEMA_FILE); + propertyList.add(DaffodilResources.VALIDATION_MODE); + propertyList.add(DaffodilResources.CACHE_SIZE); + propertyList.add(DaffodilResources.CACHE_TTL_AFTER_LAST_ACCESS); + propertyList.add(DaffodilResources.CONFIG_FILE); + daffodilProperties = Collections.unmodifiableList(propertyList); + propertyNames = daffodilProperties.stream().map(PropertyDescriptor::getName).collect(Collectors.toList()); + } + + /** + * Converts a Java Map to a Scala Map. This method is needed to call the withExternalVariables method, + * which is in the Daffodil Java API yet takes Scala objects as parameters. + * @param javaMap a Java Map + * @return the Java Map converted to a Scala Map + */ + public static scala.collection.immutable.Map hashMapToScalaMap(Map javaMap) { + return JavaConverters.mapAsScalaMapConverter(javaMap).asScala().toMap(Predef.$conforms()); + } + + public static void logDiagnostics(ComponentLog logger, WithDiagnostics diagnostics) { + final List diags = diagnostics.getDiagnostics(); + for (Diagnostic diag : diags) { + String message = diag.toString(); + if (diag.isError()) { + logger.error(message); + } else { + logger.warn(message); + } + } + } + + /** + * Gets a NiFi RecordSchema from a passed in ProcessorFactory object using the Daffodil DSOM API + * and a custom RecordWalker implementation of that API + * @param pf the given ProcessorFactory object + * @return a RecordSchema corresponding to pf + * @throws DaffodilCompileException if the passed in ProcessorFactory is in an errored state + */ + public static RecordSchema getSchema(ProcessorFactory pf) throws DaffodilCompileException { + if (pf.isError()) { + throw new DaffodilCompileException(pf.getDiagnostics().toString()); + } + RecordWalker walker = new RecordWalker(); + walker.walkFromRoot(pf.experimental().rootView()); + return walker.result(); + } + + /** + * Returns a Pair containing a RecordSchema and a Daffodil DataProcessor, configured + * based on all of the supplied parameters. + * @return a DataProcessorSchemaPair object as described above + * @throws DaffodilCompileException if: + * - the schema file fails to compile + * - one of the properties is not successfully set + * - there is a problem generating a DataProcessor from the ProcessorFactory + */ + public static DataProcessorSchemaPair newDataProcessorSchemaPair(ComponentLog logger, String dfdlSchema, + String validationModeAsString, + HashMap extVariableMap, + HashMap tunableMap, + String configFile) throws DaffodilCompileException { + File f = new File(dfdlSchema); + Compiler daffodilCompiler = Daffodil.compiler(); + + if (!tunableMap.keySet().isEmpty()) { + daffodilCompiler = daffodilCompiler.withTunables(tunableMap); + } + + ProcessorFactory pf; + try { + pf = daffodilCompiler.compileFile(f); + } catch (IOException ioe) { + throw new DaffodilCompileException("Failed to compile DFDL schema: " + dfdlSchema, ioe); + } + + if (pf.isError()) { + logger.error("Failed to compile DFDL schema: " + dfdlSchema); + logDiagnostics(logger, pf); + throw new DaffodilCompileException("Failed to compile DFDL schema: " + dfdlSchema); + } + RecordSchema schema = getSchema(pf); + + ValidationMode validationMode; + switch (validationModeAsString) { + case DaffodilResources.OFF_VALUE: + validationMode = ValidationMode.Off; + break; + case DaffodilResources.LIMITED_VALUE: + validationMode = ValidationMode.Limited; + break; + case DaffodilResources.FULL_VALUE: + validationMode = ValidationMode.Full; + break; + default: throw new AssertionError("validation mode was not one of 'off', 'limited', or 'full'"); + } + + try { + DataProcessor dp = pf.onPath("/"); + if (dp.isError()) { + throw new DaffodilCompileException("DataProcessor error: " + dp.getDiagnostics().toString()); + } + try { + dp = dp.withValidationMode(validationMode); + if (!extVariableMap.isEmpty()) { + dp = dp.withExternalVariables(hashMapToScalaMap(extVariableMap)); + } + if (configFile != null && !configFile.replaceAll("\\s", "").isEmpty()) { + dp = dp.withExternalVariables(new File(configFile)); + } + if (dp.isError()) { + throw new DaffodilCompileException("DataProcessor error: " + dp.getDiagnostics().toString()); + } + return new DataProcessorSchemaPair(dp, schema); + } catch (InvalidUsageException e) { + throw new DaffodilCompileException( + "DataProcessor error when setting validation mode, ext. variables, or config file", e + ); + } + } catch (ProcessException e) { + logger.error("Failed to process due to {}", new Object[]{e}); + throw new DaffodilCompileException("DataProcessor error due to ProcessException", e); + } + } + + /** + * @param context ProcessContext from which various configuration items will be obtained + * @param dfdlSchema path to a DFDL schema file + * @return an existing ProcessorFactory object if it was already generated and cached, otherwise a new ProcessorFactory object + * @throws DaffodilCompileException if creating a new ProcessorFactory object fails + * or a PF expected to be cached is not successfully obtained. + */ + public static DataProcessorSchemaPair getDataProcessorSchemaPair(ComponentLog logger, PropertyContext context, + String dfdlSchema) throws DaffodilCompileException { + final String validationMode = context.getProperty(DaffodilResources.VALIDATION_MODE).getValue(); + final String configFile = context.getProperty(DaffodilResources.CONFIG_FILE).getValue(); + HashMap extVariableMap = new HashMap<>(); + HashMap tunableMap = new HashMap<>(); + context.getAllProperties().forEach( + (name, value) -> { + // we have to handle these 2 cases explicitly because Infoset Type and StreamMode are not + // shared, general properties; only the Processors have Infoset Type, and only the Controllers + // have Stream Mode + if (!propertyNames.contains(name) && !"infoset-type".equals(name) && !"stream-mode".equals(name)) { + boolean isTunable = name.startsWith("+"); + // remove the + sign from the tunable variable key + String actualName = isTunable ? name.substring(1) : name; + if (isTunable) { + tunableMap.put(actualName, value); + } else { + extVariableMap.put(actualName, value); + } + } + } + ); + if (cache != null) { + try { + return cache.get(new CacheKey( + dfdlSchema, validationMode, extVariableMap, tunableMap, configFile) + ); + } catch (ExecutionException e) { + throw new DaffodilCompileException("Could not obtain Processor from cache", e); + } + } else { + return newDataProcessorSchemaPair(logger, dfdlSchema, validationMode, extVariableMap, tunableMap, configFile); + } + } + + public static LoadingCache cache; + + public static class CacheKey { + private final String dfdlSchema; + private final String validationMode; + private final HashMap externalVariables; + private final HashMap tunableMap; + private final String configFile; + + public CacheKey(String dfdlSchema, String validationMode, HashMap externalVariables, + HashMap tunableMap, String configFile) { + this.dfdlSchema = dfdlSchema; + this.validationMode = validationMode; + this.externalVariables = externalVariables; + this.tunableMap = tunableMap; + this.configFile = configFile; + } + + public boolean equals(Object other) { + if (other == this) return true; + if (!(other instanceof CacheKey)) return false; + CacheKey otherKey = (CacheKey) other; + return Objects.equals(dfdlSchema, otherKey.dfdlSchema) && + Objects.equals(validationMode, otherKey.validationMode) && + Objects.equals(externalVariables, otherKey.externalVariables) && + Objects.equals(tunableMap, otherKey.tunableMap) && + Objects.equals(configFile, otherKey.configFile); + } + + public int hashCode() { + return Objects.hash(dfdlSchema, validationMode, externalVariables, tunableMap, configFile); + } + + } + + public static class DataProcessorSchemaPair { + private final DataProcessor dataProcessor; + private final RecordSchema schema; + + public DataProcessorSchemaPair(DataProcessor dataProcessor, RecordSchema schema) { + this.dataProcessor = dataProcessor; + this.schema = schema; + } + + public DataProcessor getDataProcessor() { + return dataProcessor; + } + + public RecordSchema getSchema() { + return schema; + } + } + + public static void buildCache(ComponentLog logger, ProcessContext context) { + final Integer cacheSize = context.getProperty(DaffodilResources.CACHE_SIZE).asInteger(); + final Long cacheTTL = context.getProperty(DaffodilResources.CACHE_TTL_AFTER_LAST_ACCESS).asTimePeriod(TimeUnit.SECONDS); + + // Update the `cache` variable to contain a new DataProcessor/RecordSchema pair, where the key + // is a CacheKey based on all the possible configuration items + if (cacheSize > 0) { + CacheBuilder cacheBuilder = CacheBuilder.newBuilder().maximumSize(cacheSize); + if (cacheTTL > 0) { + cacheBuilder.expireAfterAccess(cacheTTL, TimeUnit.SECONDS); + } + cache = cacheBuilder.build( + new CacheLoader() { + public DataProcessorSchemaPair load(CacheKey cacheKey) throws DaffodilCompileException { + return newDataProcessorSchemaPair( + logger, cacheKey.dfdlSchema, cacheKey.validationMode, cacheKey.externalVariables, + cacheKey.tunableMap, cacheKey.configFile + ); + } + } + ); + } else { + cache = null; + logger.warn("Daffodil data processor cache disabled because cache size is set to 0."); + } + } + +} diff --git a/nifi-daffodil-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-daffodil-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService new file mode 100644 index 0000000..f019074 --- /dev/null +++ b/nifi-daffodil-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +com.tresys.nifi.controllers.DaffodilReader +com.tresys.nifi.controllers.DaffodilWriter diff --git a/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeInputter.scala b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeInputter.scala new file mode 100644 index 0000000..a6d16cb --- /dev/null +++ b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeInputter.scala @@ -0,0 +1,105 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.infoset + +import java.util.{Iterator => JIterator} + +import com.tresys.nifi.util.DaffodilProcessingException +import org.apache.daffodil.dpath.NodeInfo +import org.apache.daffodil.infoset.InfosetInputterEventType +import org.apache.daffodil.japi.infoset.InfosetInputter +import org.apache.daffodil.util.{MStackOf, MaybeBoolean} +import org.apache.nifi.logging.ComponentLog + +/** + * This class is largely based off of the JDOMInfosetInputter, since the notion + * of a "Node" in a JDOM Document, as handled in that class, is general enough to be + * applied here for InfosetNodes + */ +class InfosetNodeInputter(val rootNode: InfosetNode, logger: ComponentLog) + extends InfosetInputter { + + private val nodeStack: MStackOf[(InfosetNode, JIterator[InfosetNode])] = { + val newStack = new MStackOf[(InfosetNode, JIterator[InfosetNode])] + val iterator = rootNode.iterator + if (!iterator.hasNext) { + throw new DaffodilProcessingException("Root InfosetNode does not contain a root element") + } + newStack.push((null, iterator)) + newStack + } + + var doStartEvent = true + + override def getEventType(): InfosetInputterEventType = { + import InfosetInputterEventType._ + if (nodeStack.top._1 == null) { + if (doStartEvent) StartDocument else EndDocument + } else { + if (doStartEvent) StartElement else EndElement + } + } + + private def nullableString(str: String): String = Option(str).fold("")(s => s) + + override def getLocalName(): String = nodeStack.top._1.getName + + override def getSimpleText(primType: NodeInfo.Kind): String = nullableString(nodeStack.top._1.getValue) + + override def isNilled(): MaybeBoolean = MaybeBoolean.Nope + + override def hasNext(): Boolean = !(nodeStack.top._1 == null && !doStartEvent) + + private def tryDescend(): Boolean = { + if (nodeStack.top._2.hasNext) { + val childNode: InfosetNode = nodeStack.top._2.next + nodeStack.push((childNode, childNode.iterator)) + true + } else false + } + + private def stackToString(): String = { + nodeStack.toList.asInstanceOf[List[(InfosetNode, JIterator[InfosetNode])]] + .map(pair => if (pair._1 == null) "" else nullableString(pair._1.getName)).mkString(", ") + } + + override def next(): Unit = { + if (hasNext()) { + if (tryDescend()) { + doStartEvent = true + } else { + if (doStartEvent) { + doStartEvent = false + } else { + nodeStack.pop + if (tryDescend()) { + doStartEvent = true + } + } + } + } + } + + override def getNamespaceURI(): String = null + + override val supportsNamespaces: Boolean = false + + override def fini(): Unit = { + nodeStack.clear() + } + +} diff --git a/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeOutputter.scala b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeOutputter.scala new file mode 100644 index 0000000..b001f7a --- /dev/null +++ b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/infoset/InfosetNodeOutputter.scala @@ -0,0 +1,79 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.infoset + +import com.tresys.nifi.util.DaffodilProcessingException +import org.apache.daffodil.infoset.{DIArray, DIComplex, DISimple} +import org.apache.daffodil.japi.infoset.InfosetOutputter +import org.apache.daffodil.util.MStackOf + +class InfosetNodeOutputter extends InfosetOutputter { + + private val nodeStack: MStackOf[InfosetNode] = new MStackOf[InfosetNode] + + def getResult: Option[InfosetNode] = Option(nodeStack.top) + + private def addNode(node: InfosetNode): Boolean = { + if (nodeStack.isEmpty) throw new DaffodilProcessingException("Tried to add to empty stack!") + nodeStack.top.addChild(node) + nodeStack.push(node) + true + } + + override def startSimple (diSimple: DISimple): Boolean = { + val newNode: InfosetNode + = new InfosetNode (diSimple.erd.name, false) + if (diSimple.hasValue) newNode.setValue (diSimple.dataValueAsString) + if (nodeStack.isEmpty) throw new DaffodilProcessingException("Tried to add to empty stack!") + nodeStack.top.addChild(newNode) + true + } + + override def endSimple (diSimple: DISimple): Boolean = true + + override def startComplex (diComplex: DIComplex): Boolean = { + addNode(new InfosetNode (diComplex.erd.name, false)) + } + + override def endComplex (diComplex: DIComplex): Boolean = { + nodeStack.pop + true + } + + override def startArray (diArray: DIArray): Boolean = { + addNode(new InfosetNode (diArray.erd.name, true)) + } + + override def endArray (diArray: DIArray): Boolean = { + nodeStack.pop + true + } + + override def reset(): Unit = nodeStack.clear + + override def startDocument: Boolean = { + nodeStack.push(new InfosetNode("root")) + true + } + + override def endDocument: Boolean = { + if (nodeStack.isEmpty) throw new DaffodilProcessingException("Stack should not be empty after parse!") + true + } + + override def toString: String = if (nodeStack.isEmpty) "" else nodeStack.top.toString +} diff --git a/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordSchemaNode.scala b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordSchemaNode.scala new file mode 100644 index 0000000..6e4b3fa --- /dev/null +++ b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordSchemaNode.scala @@ -0,0 +1,132 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.schema + +import java.util.{ Locale => JLocale } + +import org.apache.daffodil.dsom.walker._ +import org.apache.nifi.serialization.record.RecordFieldType + +/** + * A temporary Node representing a single item in the DSOM tree. These are generated + * as various nodes are encountered during the walk defined in RecordWalker. + * @param dsomElement the original DSOM Term or ModelGroup from which attributes will be obtained + */ +class RecordSchemaNode(dsomElement: TermView) { + // If this Node represents a simple type, this is the exact simple type it is in terms of a Record + final val simpleType: Option[RecordFieldType] = dsomElement match { + case element: ElementDeclView => + if (element.isSimpleType) { + val recordFieldType: RecordFieldType = element.simpleType.primType match { + /** + * While I suspect all the "regular" primitives and the String type work, + * all that has been tested so far are UnsignedInts, which get turned into Longs, + * and Strings. The date/time-based types may or may not be compatible with NIFI's notion + * of date and time + */ + case _: DateTimeView => RecordFieldType.TIMESTAMP + case _: DateView => RecordFieldType.DATE + case _: TimeView => RecordFieldType.TIME + case _: BooleanView => RecordFieldType.BOOLEAN + case _: ByteView => RecordFieldType.BYTE + case _: UnsignedByteView | _: ShortView => RecordFieldType.SHORT + case _: UnsignedShortView | _: IntView => RecordFieldType.INT + case _: UnsignedIntView | _: LongView => RecordFieldType.LONG + case _: UnsignedLongView => RecordFieldType.BIGINT + case _: FloatView => RecordFieldType.FLOAT + case _: DecimalView | _: DoubleView => RecordFieldType.DOUBLE + case _: StringView | _: HexBinaryView | _: AnyURIView => RecordFieldType.STRING + case _ => throw new IllegalArgumentException(s"unsupported type for element $element") + } + Some(recordFieldType) + } else None + case _ => None + } + + /* This is going to become the name of a RecordField. + * Choices should be the only SchemaNodes that don't have names after postProcessing, + * except for a few wrapper Records that are necessary in some cases + */ + final val name: Option[String] = dsomElement match { + case elementBase: ElementBaseView => Some(elementBase.name) + case _ => None + } + + final val isSimple: Boolean = name.isDefined && simpleType.isDefined + + /** + * Represents if this SchemaNode should be treated as an OptionalRecordField + */ + final val isOptional: Boolean = dsomElement match { + case element: ElementBaseView => element.isOptional + case _ => false + } + + final val recordType: RecordFieldType = dsomElement match { + case _: ChoiceView => RecordFieldType.CHOICE + // Sequences aren't array types; they are treated like Records + case _: SequenceView => RecordFieldType.RECORD + /** + * If an element is a simple type, that simple type just becomes its record type. + */ + case element: ElementBaseView => + if (element.isArray) { + RecordFieldType.ARRAY + } else if (isSimple) { + simpleType.get + } else RecordFieldType.RECORD + // probably isn't the *best* solution, but this is a surefire way to ensure nothing other than the above + // types are passed to the constructor of this class. + case _ => throw new IllegalArgumentException(s"Unsupported type ${dsomElement.getClass}") + } + + final val namespace: String = dsomElement.namespaces.uri + + /** + * These represent any type of embedded DFDL element that is a sub-element of this SchemaNode + * Once postProcessing is done, this should mostly only be items of an Array, the options for + * a Choice, or the list of fields for a Record + */ + var children: List[RecordSchemaNode] = List() + + def addChild(newChild: RecordSchemaNode): Unit = { + children = children :+ newChild + } + + /** + * This is all used to get nice, printable representations of the SchemaNode tree before + * it is all converted to Records. Generally just used for debugging purposes; + * namely, to see if the RecordSchema matches the original SchemaNode tree + */ + private final val nameAttr: String = + if (name.isDefined && simpleType.isDefined) { + s" ${name.get}='${simpleType.get}'" + } else if (name.isDefined) s" name='${name.get}'" else "" + private final val optionalAttr: String = if (isOptional) " optional" else "" + private final val startingStr + = s"<${recordType.toString.toLowerCase(JLocale.ROOT)}$nameAttr$optionalAttr {$namespace}>" + private final val endingStr = s"" + + override def toString: String = toString(this, "") + private def toString(node: RecordSchemaNode, tabs: String): String = { + tabs + node.startingStr + { + for { + child <- node.children + } yield "\n" + toString(child, tabs + "\t") + }.mkString("") + {if (node.children.nonEmpty) "\n" + tabs else ""} + node.endingStr + } +} diff --git a/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordUtil.scala b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordUtil.scala new file mode 100644 index 0000000..cb2f277 --- /dev/null +++ b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordUtil.scala @@ -0,0 +1,113 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.schema + +import java.util.{ List => JList } + +import org.apache.nifi.logging.ComponentLog +import org.apache.nifi.serialization.record.`type`.{ArrayDataType, ChoiceDataType, RecordDataType} +import org.apache.nifi.serialization.record.{DataType, Record, RecordField, RecordSchema} + +import scala.collection.JavaConverters + +class RecordUtil + +object RecordUtil { + + val PRODUCTION_MODE: Boolean = true + + def log(logger: ComponentLog, message: String, args: Array[Object]): Unit = { + if (!PRODUCTION_MODE) { + logger.error(message, args) + } + } + + def log(logger: ComponentLog, message: String): Unit = { + log(logger, message, new Array[Object](0)) + } + + def printRecord(recordObject: Object, tabs: String): String = { + recordObject match { + case record: Record => + val builder: StringBuilder = new StringBuilder(tabs) + builder.append("Record {\n") + builder.append( + JavaConverters.asScalaBuffer(record.getSchema.getFields) + .filter(field => record.getValue(field.getFieldName) != null) + .map( recordField => { + tabs + "\t" + recordField.getFieldName + ": " + + printRecord(record.getValue(recordField.getFieldName), tabs + "\t").substring(tabs.length + 1) + } + ).mkString(s",\n") + ) + builder.append(s"\n$tabs}") + builder.toString + case arr: Array[Object] => + val builder: StringBuilder = new StringBuilder(tabs) + builder.append("ARRAY [\n") + builder.append( + arr.map(child => tabs + printRecord(child, tabs + "\t").substring(tabs.length)) + .mkString(s",\n") + ) + builder.append(s"\n$tabs]") + builder.toString + case _ => tabs + recordObject.toString + } + } + + def printRecordSchema(schema: RecordSchema): Unit = { + println(printRecordSchemaHelper(schema, "")) + } + + def getRecordPrefix(record: RecordSchema): String = s"Record {${record.getSchemaNamespace.orElse("")}}" + + def printRecordSchemaHelper(someObject: Object, tabs: String): String = { + someObject match { + case record: RecordSchema => + tabs + getRecordPrefix(record) + " {\n" + + recordListAsString(record.getFields, tabs + "\t") + "\n" + tabs + "}" + case recordType: RecordDataType => printRecordSchemaHelper(recordType.getChildSchema, tabs) + case choiceDataType: ChoiceDataType => + tabs + "Choice [\n" + + dataTypeListAsString(choiceDataType.getPossibleSubTypes, tabs + "\t") + "\n" + tabs + "]" + case arrayDataType: ArrayDataType => + tabs + "ARRAY [\n" + printRecordSchemaHelper(arrayDataType.getElementType, tabs + "\t") + + "\n" + tabs + "]" + case dataType: DataType => tabs + dataType.toString + case _ => throw new IllegalArgumentException(s"Unsupported type ${someObject.getClass}") + } + } + + private def recordFieldPrefix(field: RecordField): String = field match { + case _: OptionalRecordField => "(Optional)" + field.getFieldName + case _ => field.getFieldName + } + + def recordListAsString(javaList: JList[RecordField], tabs: String): String = { + JavaConverters.asScalaBuffer(javaList).map( + recordField => + tabs + recordFieldPrefix(recordField) + ": " + + printRecordSchemaHelper(recordField.getDataType, tabs).substring(tabs.length) + ).mkString(",\n") + } + + def dataTypeListAsString(javaList: JList[DataType], tabs: String): String = { + JavaConverters.asScalaBuffer(javaList) + .map(subType => tabs + printRecordSchemaHelper(subType, tabs).substring(tabs.length)) + .mkString(",\n") + } +} diff --git a/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordWalker.scala b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordWalker.scala new file mode 100644 index 0000000..f2fedc0 --- /dev/null +++ b/nifi-daffodil-processors/src/main/scala/com/tresys/nifi/schema/RecordWalker.scala @@ -0,0 +1,225 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.schema + +import org.apache.daffodil.dsom.walker._ +import org.apache.nifi.serialization.SimpleRecordSchema +import org.apache.nifi.serialization.record.`type`.{ ArrayDataType, ChoiceDataType, RecordDataType } +import org.apache.nifi.serialization.record.{ DataType, RecordField, RecordFieldType, RecordSchema } + +import scala.collection.JavaConverters._ + +/** + * Direct subclass of the NIFI RecordField. NIFI doesn't have "Optional" fields, so + * eventually this will either be removed from the Schema if an Infoset doesn't have the field, + * or it will become a regular RecordField if it does. + * @param recordField an already existing RecordField from which the Name and DataType will be obtained + */ +class OptionalRecordField(recordField: RecordField) + extends RecordField(recordField.getFieldName, recordField.getDataType) { + override def equals(obj: Any): Boolean = + obj match { + case _: OptionalRecordField => super.equals(obj) + case _ => false + } + override def hashCode(): Int = 31 * super.hashCode() + 1 + override def toString: String = "Optional" + super.toString +} + +/** + * Concrete implementation of the AbstractDSOMWalker abstract class. + * This class produces a NIFI RecordSchema that is intended to match the original DFDL file. + * + * The RecordSchema is built in 3 primary stages: + * 1) A tree of SchemaNodes is created as the DFDL file is walked; this walk is performed + * through the various event handlers defined in the parent abstract class. + * 2) The tree of SchemaNodes undergoes some post-processing, mainly to remove redundant Record wrappers. + * 3) The tree of SchemaNodes is converted into a RecordSchema; it is walked recursively within this class. + */ +class RecordWalker extends AbstractDSOMWalker { + + // this is the critical data structure for managing the temporary SchemaNodes that are created + // when the Schema is initially walked. This will then be converted to the actual RecordSchema. + private var objectStack: List[RecordSchemaNode] = List() + + lazy val result: RecordSchema = { + if (objectStack.isEmpty) null else schemaNodeToRecordType(objectStack.head).getChildSchema + } + + lazy val stringRep: String = if (result != null) result.toString else "" + + override def onTermBegin(termElement: TermView): Unit = { + termElement match { + case _: SequenceView | _: ChoiceView | _: ElementBaseView => + val newNode: RecordSchemaNode = new RecordSchemaNode(termElement) + // we need add the new node as a new child of whatever is currently at the top of the stack + objectStack.head.addChild(newNode) + // however, we also add the node to the stack itself! We need to be able to add children to it + // if it is, say, another record or array. + objectStack = newNode +: objectStack + case _ => + } + } + + override def onTermEnd(termElement: TermView): Unit = { + termElement match { + case _: SequenceView | _: ChoiceView | _: ElementBaseView => objectStack = objectStack.tail + case _ => + } + } + + override def onTypeBegin(typeElement: TypeView): Unit = {} + + override def onTypeEnd(typeElement: TypeView): Unit = {} + + override def onWalkBegin(root: RootView): Unit = { + objectStack = List(new RecordSchemaNode(root)) + } + + /** + * Perform postProcessing; this happens *after* the SchemaNode tree is created but *before* that tree + * gets converted to a RecordSchema + */ + private def postProcessing(): Unit = { + removeExtraRecords(objectStack.head) + } + + /** + * Recursively replace any SchemaNodes that are of type record and do not have + * a name attribute with their children. These usually represent unnecessary wrapper nodes or empty + * records with no elements. + * + * Given a SchemaNode, if any of its children are considered "extra", it is replaced with its + * own child list (which may be empty, in which case they are removed), until their are no more + * "extra" children. + * + * The loop is necessary because sometimes we can have towers of extra Nodes that would + * never get resolved if we just took care of 1 or 2 layers; all must be dealt with at once. + * @param schemaNode the current Node undergoing the algorithm described above + */ + private def removeExtraRecords(schemaNode: RecordSchemaNode): Unit = { + while (schemaNode.children.exists(child => isExtraRecord(schemaNode, child))) { + schemaNode.children = schemaNode.children.flatMap( + child => if (isExtraRecord(schemaNode, child)) child.children else List(child) + ) + } + // call this helper method on each of this Nodes's children. + schemaNode.children.foreach(removeExtraRecords) + } + + /** + * Determines if a Record is "extra"; that is, if it should be replaced with + * its list of children SchemaNodes within whatever parent SchemaNode it's a part of + * @param childNode the node to be considered + * @return whether or not this Record is "extra" according to the situations below + */ + private def isExtraRecord(parentNode: RecordSchemaNode, childNode: RecordSchemaNode): Boolean = { + // any no-name nodes with no children are immediately removed + (childNode.name.isEmpty && childNode.children.isEmpty) || { + parentNode.recordType match { + case RecordFieldType.RECORD | RecordFieldType.ARRAY => + childNode.recordType match { + // This removes extra wrapper records around children of records or arrays + // usually used to remove things like the DFDL complexType, simpleType elements + case RecordFieldType.RECORD => childNode.name.isEmpty + case _ => false + } + // Currently, all double choices are removed. This was mainly done to make GroupRefs work + // for the JPEG Schema, but may not be the correct approach for all cases. + case RecordFieldType.CHOICE => + childNode.recordType match { + case RecordFieldType.CHOICE => true + case _ => false + } + case _ => false + } + } + + } + + override def onWalkEnd(root: RootView): Unit = { + // After the walk is over, we perform postProcessing and then convert the SchemaNode tree + // into a RecordSchema. Also, if we are in dev. mode, we print out the SchemaNode tree + if (!RecordUtil.PRODUCTION_MODE) println(objectStack.head) + postProcessing() + if (!RecordUtil.PRODUCTION_MODE) println(objectStack.head) + } + + /** + * Helper method to specifically convert a SchemaNode intended to be a Record into a NIFI RecordSchema, + * and then create a NiFi Record Data Type from this Schema + * @param node the node from which the name and the namespace of the Schema will be obtained + * @param children the List of child nodes that will become the List of Fields in the RecordSchema + * @return a RecordDataType containing the generated RecordSchema + */ + private def schemaNodeToRecordType(node: RecordSchemaNode, + children: List[RecordSchemaNode]): RecordDataType = { + val newSchema: SimpleRecordSchema = new SimpleRecordSchema(children.map(nodeToField).asJava) + newSchema.setSchemaName(node.name.getOrElse("")) + newSchema.setSchemaNamespace(node.namespace) + new RecordDataType(newSchema) + } + + private def schemaNodeToRecordType(node: RecordSchemaNode): RecordDataType + = schemaNodeToRecordType(node, node.children) + + private def nodeToField(schemaNode: RecordSchemaNode): RecordField = { + // by default, if this node doesn't have a name, its data type is used as the field name. + // This should only ever be the case for anonymous choices. + val recordField: RecordField = new RecordField( + schemaNode.name.getOrElse(schemaNode.recordType.getDataType.toString), + schemaNodeToDataType(schemaNode) + ) + if (schemaNode.isOptional) new OptionalRecordField(recordField) else recordField + } + + /** + * Helper method to convert a SchemaNode known to be a choice into a NIFI Choice data type. + * This is able to handle a DFDL schema in which either named elements are directly sub-members of + * DFDL choices or if they are embedded in another element (which corresponds to being in a NIFI Record). + * In the end, if they are not already inside a NIFI Record, then they are put there. NIFI Choices cannot + * have fields, only possible sub-types, so anything that would be a "field" has a wrapper NIFI Record + * put around it. + * @param schemaNode the node to convert to a NIFI Choice data type + * @return a NIFI Choice data type as described above + */ + private def choiceNodeToChoiceSchema(schemaNode: RecordSchemaNode): DataType = { + val childList: List[DataType] = schemaNode.children.map( + child => if (child.name.isEmpty) schemaNodeToRecordType(child) + else schemaNodeToRecordType(child, List(child)) + ) + new ChoiceDataType(childList.asJava) + } + + /** + * Local helper method to appropriately convert a SchemaNode into an appropriate NIFI Record + * Data Type. Records and Choices get routed to other helper methods, and Arrays are handled in the method. + * @param schemaNode the node to convert to a NIFI Record Data Type + * @return the finalized NIFI Record Data type + */ + private def schemaNodeToDataType(schemaNode: RecordSchemaNode): DataType = schemaNode.recordType match { + case RecordFieldType.ARRAY => + new ArrayDataType( + if (schemaNode.isSimple) schemaNode.simpleType.get.getDataType + else schemaNodeToRecordType(schemaNode) + ) + case RecordFieldType.RECORD => schemaNodeToRecordType(schemaNode) + case RecordFieldType.CHOICE => choiceNodeToChoiceSchema(schemaNode) + case recordType => recordType.getDataType + } + +} diff --git a/nifi-daffodil-processors/src/test/java/com/tresys/nifi/controllers/TestDaffodilControllers.java b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/controllers/TestDaffodilControllers.java new file mode 100644 index 0000000..7f9f78d --- /dev/null +++ b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/controllers/TestDaffodilControllers.java @@ -0,0 +1,470 @@ +/* + * Copyright 2020 Tresys, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.controllers; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import java.util.HashMap; + +import com.tresys.nifi.util.DaffodilResources; +import org.apache.nifi.controller.ControllerService; +import org.apache.nifi.json.JsonRecordSetWriter; +import org.apache.nifi.processors.standard.ConvertRecord; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.json.JsonTreeReader; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.apache.nifi.xml.XMLReader; +import org.apache.nifi.xml.XMLRecordSetWriter; + +import org.junit.Test; + +public class TestDaffodilControllers { + + @Test + public void testDFDLSchemaNotFound() throws InitializationException { + final TestRunner testRunner = TestRunners.newTestRunner(ConvertRecord.class); + ControllerService daffodilReader = new DaffodilReader(); + testRunner.addControllerService("daffodilReader", daffodilReader); + testRunner.setProperty(daffodilReader, DaffodilResources.DFDL_SCHEMA_FILE.getName(), "/does/not/exist.dfdl.xsd"); + ControllerService daffodilWriter = new DaffodilWriter(); + testRunner.addControllerService("daffodilWriter", daffodilWriter); + testRunner.setProperty(daffodilWriter, DaffodilResources.DFDL_SCHEMA_FILE.getName(), "/does/not/exist.dfdl.xsd"); + testRunner.setProperty("record-reader", "daffodilReader"); + testRunner.setProperty("record-writer", "daffodilWriter"); + testRunner.assertNotValid(); + } + + private enum ControllerOption { + XML_READER, + XML_WRITER, + JSON_READER, + JSON_WRITER + } + + private TestRunner setupRunner(ControllerOption option, boolean expressionLanguage, boolean safeXMLSetup, + Map propertyMapping) throws InitializationException { + TestRunner testRunner = TestRunners.newTestRunner(ConvertRecord.class); + switch (option) { + case XML_READER: { + ControllerService xmlReader = new XMLReader(); + testRunner.addControllerService("xmlReader", xmlReader); + if (safeXMLSetup) { + testRunner.setProperty(xmlReader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY); + } + testRunner.enableControllerService(xmlReader); + testRunner.setProperty("record-reader", "xmlReader"); + ControllerService daffodilWriter = new DaffodilWriter(); + testRunner.addControllerService("daffodilWriter", daffodilWriter); + propertyMapping.forEach((name, value) -> testRunner.setProperty(daffodilWriter, name, value)); + testRunner.enableControllerService(daffodilWriter); + testRunner.setProperty("record-writer", "daffodilWriter"); + break; + } + case JSON_READER: { + ControllerService jsonReader = new JsonTreeReader(); + testRunner.addControllerService("jsonReader", jsonReader); + testRunner.enableControllerService(jsonReader); + testRunner.setProperty("record-reader", "jsonReader"); + ControllerService daffodilWriter = new DaffodilWriter(); + testRunner.addControllerService("daffodilWriter", daffodilWriter); + propertyMapping.forEach((name, value) -> testRunner.setProperty(daffodilWriter, name, value)); + testRunner.enableControllerService(daffodilWriter); + testRunner.setProperty("record-writer", "daffodilWriter"); + break; + } + case XML_WRITER: { + ControllerService xmlWriter = new XMLRecordSetWriter(); + testRunner.addControllerService("xmlWriter", xmlWriter); + if (safeXMLSetup) { + testRunner.setProperty(xmlWriter, XMLRecordSetWriter.ROOT_TAG_NAME, "Record"); + } + testRunner.enableControllerService(xmlWriter); + testRunner.setProperty("record-writer", "xmlWriter"); + ControllerService daffodilReader = new DaffodilReader(); + testRunner.addControllerService("daffodilReader", daffodilReader); + propertyMapping.forEach((descriptor, value) -> testRunner.setProperty(daffodilReader, descriptor, value)); + testRunner.enableControllerService(daffodilReader); + testRunner.setProperty("record-reader", "daffodilReader"); + break; + } + default: + ControllerService jsonWriter = new JsonRecordSetWriter(); + testRunner.addControllerService("jsonWriter", jsonWriter); + testRunner.enableControllerService(jsonWriter); + testRunner.setProperty("record-writer", "jsonWriter"); + ControllerService daffodilReader = new DaffodilReader(); + testRunner.addControllerService("daffodilReader", daffodilReader); + propertyMapping.forEach((descriptor, value) -> testRunner.setProperty(daffodilReader, descriptor, value)); + testRunner.enableControllerService(daffodilReader); + testRunner.setProperty("record-reader", "daffodilReader"); + } + testRunner.setValidateExpressionUsage(expressionLanguage); + return testRunner; + } + + private TestRunner setupRunner(ControllerOption option, boolean expressionLanguage, + Map propertyMapping) throws InitializationException { + return setupRunner(option, expressionLanguage, false, propertyMapping); + } + + @Test + public void testDFDLSchemaNotValid() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv-invalid.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile original = testRunner.getFlowFilesForRelationship("failure").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + original.assertContentEquals(expectedContent); + } + + @Test + public void testParseCSV() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testParseCSVFail() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + // trying to parse the XML file will fail, expects CSV data + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testUnparseCSV() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_READER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); + } + + @Test + public void testUnparseCSVFail() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_READER, false, valueMap); + // trying to unparse CSV will fail, expects an XML infoset + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testExpressionLanguage() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "${dfdl.schema}"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, true, valueMap); + final Map attributes = new HashMap<>(); + attributes.put("dfdl.schema", "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"), attributes); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testNoCache() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + valueMap.put(DaffodilResources.CACHE_SIZE.getName(), "0"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testParseCSVJson() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.json"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testUnparseCSVJson() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_READER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.json")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); + } + + @Test + public void testUnparseCSVAttributeJSON() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_READER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.json"), new HashMap<>()); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile csvFile = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + csvFile.assertContentEquals(expectedContent.replace("\r\n", "\n")); + } + + @Test + public void testUnparseCSVAttributeXML() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_READER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/tokens.csv.xml")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); + } + + @Test + public void testParseLeftOverData() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/leftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile original = testRunner.getFlowFilesForRelationship("failure").get(0); + final byte[] expectedContent = Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/leftover.bin")); + original.assertContentEquals(expectedContent); + } + + @Test + public void testParseNoLeftOverData() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/noleftover.bin")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/noleftover.bin.xml"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testParseCSVValidationLimited() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + valueMap.put(DaffodilResources.VALIDATION_MODE.getName(), DaffodilResources.LIMITED_VALUE); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testParseCSVValidationFull() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + valueMap.put(DaffodilResources.VALIDATION_MODE.getName(), DaffodilResources.FULL_VALUE); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testTunableParameters() throws InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("+maxOccursBounds", "4"); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, valueMap); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + infoset.assertContentEquals("12345"); + } + + @Test + public void testExternalVariables() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("valueToOverride", "1"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json"))); + expectedContent = expectedContent.replaceAll("\"\\$expectedValue\"", "1"); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testConfigFile() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put(DaffodilResources.CONFIG_FILE.getName(), "src/test/resources/TestDaffodilComponents/testConfig.xml"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json"))); + expectedContent = expectedContent.replaceAll("\"\\$expectedValue\"", "2"); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testStreamModeForJsonParse() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("stream-mode", AbstractDaffodilController.STREAM_MODE_ALL_SUCCESSFUL.getValue()); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("123456789"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + String firstExpected = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json"))); + firstInfoset.assertContentEquals(firstExpected); + } + + @Test + public void testStreamModeForJsonUnparse() throws IOException, InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("stream-mode", AbstractDaffodilController.STREAM_MODE_ALL_SUCCESSFUL.getValue()); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_READER, false, valueMap); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json")); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + firstInfoset.assertContentEquals("123456789"); + } + + @Test + public void testStreamModeForXMLParseUnparse() throws InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("stream-mode", AbstractDaffodilController.STREAM_MODE_ALL_SUCCESSFUL.getValue()); + final TestRunner testRunner = setupRunner(ControllerOption.XML_WRITER, false, true, valueMap); + testRunner.enqueue("123456789"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile infosetBytes = testRunner.getFlowFilesForRelationship("success").get(0); + final TestRunner testRunner2 = setupRunner(ControllerOption.XML_READER, false, true, valueMap); + testRunner2.enqueue(infosetBytes); + testRunner2.run(); + testRunner2.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile unparsedData = testRunner2.getFlowFilesForRelationship("success").get(0); + unparsedData.assertContentEquals("123456789"); + } + + @Test + public void testSecondRecordFailsAllSuccessful() throws InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("stream-mode", AbstractDaffodilController.STREAM_MODE_ALL_SUCCESSFUL.getValue()); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("1234567a9"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + infoset.assertContentEquals("1234567a9"); + } + + @Test + public void testSecondRecordFailsOnlySuccessful() throws InitializationException, IOException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + valueMap.put("stream-mode", AbstractDaffodilController.STREAM_MODE_ONLY_SUCCESSFUL.getValue()); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("1234567a9"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json"))); + expectedContent = expectedContent.replaceAll(",8,9", ""); + infoset.assertContentEquals(expectedContent); + } + + @Test + public void testNestedChoicesValidCase() throws InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("2"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + firstInfoset.assertContentEquals("[{\"root\":{\"B\":2}}]"); + } + + @Test + public void testNestedChoicesDefaultCase() throws InitializationException { + HashMap valueMap = new HashMap<>(); + valueMap.put(DaffodilResources.DFDL_SCHEMA_FILE.getName(), "src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd"); + final TestRunner testRunner = setupRunner(ControllerOption.JSON_WRITER, false, valueMap); + testRunner.enqueue("4"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + firstInfoset.assertContentEquals("[{\"root\":{\"D\":4}}]"); + } + +} diff --git a/nifi-daffodil-processors/src/test/java/com/tresys/nifi/processors/TestDaffodilProcessor.java b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/processors/TestDaffodilProcessor.java index 5d9f57e..eebce09 100644 --- a/nifi-daffodil-processors/src/test/java/com/tresys/nifi/processors/TestDaffodilProcessor.java +++ b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/processors/TestDaffodilProcessor.java @@ -16,57 +16,52 @@ package com.tresys.nifi.processors; -import com.tresys.nifi.processors.AbstractDaffodilProcessor.CacheKey; - -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.channels.WritableByteChannel; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.HashMap; +import com.tresys.nifi.util.DaffodilResources; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; -import org.junit.Before; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertFalse; + +import static org.junit.Assert.*; +import static org.junit.Assert.assertNotEquals; public class TestDaffodilProcessor { @Test - public void testDFDLSchemaNotFound() throws IOException { + public void testDFDLSchemaNotFound() { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "/does/not/exist.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "/does/not/exist.dfdl.xsd"); testRunner.assertNotValid(); } @Test public void testDFDLSchemaNotValid() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv-invalid.dfdl.xsd"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv-invalid.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); final MockFlowFile original = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); original.assertContentEquals(expectedContent); } @Test public void testParseCSV() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); @@ -78,7 +73,7 @@ public void testParseCSV() throws IOException { @Test public void testParseCSVFail() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); // trying to parse the XML file will fail, expects CSV data testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv.xml")); testRunner.run(); @@ -88,82 +83,41 @@ public void testParseCSVFail() throws IOException { infoset.assertContentEquals(expectedContent); } - @Test - public void testParseCSVPreCompiled() throws IOException { - File schema = new File("src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - File savedSchema = File.createTempFile("nifi-daffodil-", null); - savedSchema.deleteOnExit(); - FileOutputStream fos = new FileOutputStream(savedSchema); - WritableByteChannel output = Channels.newChannel(fos); - - org.apache.daffodil.japi.Compiler c = org.apache.daffodil.japi.Daffodil.compiler(); - org.apache.daffodil.japi.ProcessorFactory pf = c.compileFile(schema); - org.apache.daffodil.japi.DataProcessor dp = pf.onPath("/"); - dp.save(output); - - output.close(); - fos.close(); - - final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, savedSchema.getAbsolutePath()); - testRunner.setProperty(DaffodilParse.PRE_COMPILED_SCHEMA, "true"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); - testRunner.run(); - testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); - final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv.xml"))); - infoset.assertContentEquals(expectedContent); - assertEquals(DaffodilParse.XML_MIME_TYPE, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); - } - - @Test - public void testParseCSVPreCompiledFail() throws IOException { - final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.setProperty(DaffodilParse.PRE_COMPILED_SCHEMA, "true"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); - testRunner.run(); - testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); - final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); - infoset.assertContentEquals(expectedContent); - } - @Test public void testUnparseCSV() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); final Map attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), DaffodilUnparse.XML_MIME_TYPE); testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv.xml"), attributes); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilUnparse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilUnparse.REL_SUCCESS).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); - assertEquals(null, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + assertNull(infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); } @Test public void testUnparseCSVFail() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - // trying to unparse CSV will fail, expectes an XML infoset - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + // trying to unparse CSV will fail, expects an XML infoset + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilUnparse.REL_FAILURE); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilUnparse.REL_FAILURE).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent); } @Test public void testExpressionLanguage() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "${dfdl.schema}"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "${dfdl.schema}"); final Map attributes = new HashMap<>(); - attributes.put("dfdl.schema", "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"), attributes); + attributes.put("dfdl.schema", "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"), attributes); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); @@ -174,9 +128,9 @@ public void testExpressionLanguage() throws IOException { @Test public void testNoCache() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.setProperty(DaffodilParse.CACHE_SIZE, "0"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.CACHE_SIZE, "0"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); @@ -187,9 +141,9 @@ public void testNoCache() throws IOException { @Test public void testParseCSVJson() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilParse.JSON_VALUE); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); @@ -201,7 +155,7 @@ public void testParseCSVJson() throws IOException { @Test public void testUnparseCSVJson() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilUnparse.JSON_VALUE); final Map attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), DaffodilUnparse.JSON_MIME_TYPE); @@ -209,15 +163,15 @@ public void testUnparseCSVJson() throws IOException { testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilUnparse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilUnparse.REL_SUCCESS).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); - assertEquals(null, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + assertNull(infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); } @Test - public void testParseCSVAttributeInvalid() throws IOException { + public void testParseCSVAttributeInvalid() { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilParse.ATTRIBUTE_VALUE); testRunner.assertNotValid(); } @@ -225,7 +179,7 @@ public void testParseCSVAttributeInvalid() throws IOException { @Test public void testUnparseCSVAttributeJSON() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilUnparse.ATTRIBUTE_VALUE); final Map attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), DaffodilUnparse.JSON_MIME_TYPE); @@ -233,15 +187,15 @@ public void testUnparseCSVAttributeJSON() throws IOException { testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilUnparse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilUnparse.REL_SUCCESS).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); - assertEquals(null, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + assertNull(infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); } @Test public void testUnparseCSVAttributeXML() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilUnparse.ATTRIBUTE_VALUE); final Map attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), DaffodilUnparse.XML_MIME_TYPE); @@ -249,15 +203,15 @@ public void testUnparseCSVAttributeXML() throws IOException { testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilUnparse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilUnparse.REL_SUCCESS).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent.replace("\r\n", "\n")); - assertEquals(null, infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); + assertNull(infoset.getAttribute(CoreAttributes.MIME_TYPE.key())); } @Test public void testUnparseCSVAttributeUndefined() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilUnparse.ATTRIBUTE_VALUE); final Map attributes = new HashMap<>(); testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv.xml"), attributes); @@ -271,7 +225,7 @@ public void testUnparseCSVAttributeUndefined() throws IOException { @Test public void testUnparseCSVAttributeUnknown() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilUnparse.class); - testRunner.setProperty(DaffodilUnparse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); testRunner.setProperty("infoset-type", DaffodilUnparse.ATTRIBUTE_VALUE); final Map attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), "application/unknown"); @@ -287,20 +241,20 @@ public void testUnparseCSVAttributeUnknown() throws IOException { @Test public void testParseLeftOverData() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/leftover.bin")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/leftover.bin")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); final MockFlowFile original = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); - final byte[] expectedContent = Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/leftover.bin")); + final byte[] expectedContent = Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/leftover.bin")); original.assertContentEquals(expectedContent); } @Test public void testParseNoLeftOverData() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd"); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/noleftover.bin")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd"); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/noleftover.bin")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_SUCCESS); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_SUCCESS).get(0); @@ -311,49 +265,131 @@ public void testParseNoLeftOverData() throws IOException { @Test public void testParseCSVValidationLimited() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.setProperty(DaffodilParse.VALIDATION_MODE, DaffodilParse.LIMITED_VALUE); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.VALIDATION_MODE, DaffodilResources.LIMITED_VALUE); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent); } @Test public void testParseCSVValidationFull() throws IOException { final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); - testRunner.setProperty(DaffodilParse.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilProcessor/csv.dfdl.xsd"); - testRunner.setProperty(DaffodilParse.VALIDATION_MODE, DaffodilParse.FULL_VALUE); - testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv")); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/csv.dfdl.xsd"); + testRunner.setProperty(DaffodilResources.VALIDATION_MODE, DaffodilResources.FULL_VALUE); + testRunner.enqueue(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv")); testRunner.run(); testRunner.assertAllFlowFilesTransferred(DaffodilParse.REL_FAILURE); final MockFlowFile infoset = testRunner.getFlowFilesForRelationship(DaffodilParse.REL_FAILURE).get(0); - final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/tokens.csv"))); + final String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilComponents/tokens.csv"))); infoset.assertContentEquals(expectedContent); } @Test - public void testCacheKeyEquality() { - CacheKey ck1 = new CacheKey("one", false); - CacheKey ck2 = new CacheKey("one", false); - CacheKey ck3 = new CacheKey("two", false); - CacheKey ck4 = new CacheKey("one", true); - CacheKey ck5 = new CacheKey("two", true); + public void testTunableParameters() { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + testRunner.setProperty("+maxOccursBounds", "4"); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("failure"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("failure").get(0); + infoset.assertContentEquals("12345"); + } - assertTrue(ck1 != ck2); - assertTrue(ck1.equals(ck2)); - assertTrue(ck1.hashCode() == ck2.hashCode()); + @Test + public void testExternalVariables() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + testRunner.setProperty("infoset-type", DaffodilParse.JSON_VALUE); + testRunner.setProperty("valueToOverride", "1"); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json"))); + expectedContent = expectedContent.replaceAll("\\$expectedValue", "1"); + infoset.assertContentEquals(expectedContent); + } - assertFalse(ck1.equals(ck3)); - assertFalse(ck1.hashCode() == ck3.hashCode()); + @Test + public void testConfigFile() throws IOException { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/digitList.dfdl.xsd"); + testRunner.setProperty("infoset-type", DaffodilParse.JSON_VALUE); + testRunner.setProperty(DaffodilResources.CONFIG_FILE, "src/test/resources/TestDaffodilComponents/testConfig.xml"); + testRunner.enqueue("12345"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success"); + final MockFlowFile infoset = testRunner.getFlowFilesForRelationship("success").get(0); + String expectedContent = new String(Files.readAllBytes(Paths.get("src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json"))); + expectedContent = expectedContent.replaceAll("\\$expectedValue", "2"); + infoset.assertContentEquals(expectedContent); + } - assertFalse(ck1.equals(ck4)); - assertFalse(ck1.hashCode() == ck4.hashCode()); + @Test + public void testNestedChoicesValidCase() { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd"); + testRunner.setProperty("infoset-type", DaffodilUnparse.JSON_VALUE); + testRunner.enqueue("2"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + firstInfoset.assertContentEquals("{\"root\": {\"B\": \"2\"}}"); + } + + @Test + public void testNestedChoicesDefaultCase() { + final TestRunner testRunner = TestRunners.newTestRunner(DaffodilParse.class); + testRunner.setProperty(DaffodilResources.DFDL_SCHEMA_FILE, "src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd"); + testRunner.setProperty("infoset-type", DaffodilUnparse.JSON_VALUE); + testRunner.enqueue("4"); + testRunner.run(); + testRunner.assertAllFlowFilesTransferred("success", 1); + final MockFlowFile firstInfoset = testRunner.getFlowFilesForRelationship("success").get(0); + firstInfoset.assertContentEquals("{\"root\": {\"D\": \"4\"}}"); + } + + @Test + public void testCacheKey() { + List schemas = Arrays.asList("a", "b"); + List validationModes = Arrays.asList("a", "b"); + List> extVarMaps = Arrays.asList(new HashMap<>(), new HashMap<>()); + extVarMaps.get(0).put("a", "b"); + extVarMaps.get(1).put("a", "c"); + List> tunableMaps = Arrays.asList(new HashMap<>(), new HashMap<>()); + tunableMaps.get(0).put("a", "b"); + tunableMaps.get(1).put("a", "c"); + List configFiles = Arrays.asList("a", "b"); + + DaffodilResources.CacheKey baseCacheKey = new DaffodilResources.CacheKey( + schemas.get(0), validationModes.get(0), extVarMaps.get(0), + tunableMaps.get(0), configFiles.get(0) + ); + + for (int i = 0; i < 32; i++) { + String schema = schemas.get(i & 1); + String validationMode = validationModes.get((i & 0b10) >> 1); + HashMap extVarMap = extVarMaps.get((i & 0b100) >> 2); + HashMap tunableMap = tunableMaps.get((i & 0b1000) >> 3); + String configFile = configFiles.get((i & 0b10000) >> 4); + + DaffodilResources.CacheKey otherCacheKey = new DaffodilResources.CacheKey(schema, validationMode, extVarMap, tunableMap, configFile); + + assertNotSame(baseCacheKey, otherCacheKey); + if (i == 0) { + assertEquals(baseCacheKey, otherCacheKey); + assertEquals(baseCacheKey.hashCode(), otherCacheKey.hashCode()); + } else { + assertNotEquals(baseCacheKey, otherCacheKey); + assertNotEquals(baseCacheKey.hashCode(), otherCacheKey.hashCode()); + } + } - assertFalse(ck1.equals(ck5)); - assertFalse(ck1.hashCode() == ck5.hashCode()); } } diff --git a/nifi-daffodil-processors/src/test/java/com/tresys/nifi/schema/TestDFDLRecordSchema.java b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/schema/TestDFDLRecordSchema.java new file mode 100644 index 0000000..9a80d76 --- /dev/null +++ b/nifi-daffodil-processors/src/test/java/com/tresys/nifi/schema/TestDFDLRecordSchema.java @@ -0,0 +1,75 @@ +/* + * Copyright 2020 Nteligen, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tresys.nifi.schema; + +import org.apache.daffodil.japi.Daffodil; +import org.apache.daffodil.japi.ProcessorFactory; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.type.ChoiceDataType; +import org.apache.nifi.serialization.record.type.RecordDataType; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +public class TestDFDLRecordSchema { + + private RecordSchema rootSchema; + private void setup(String pathToDFDLSchema) throws IOException { + File schemaFile = new File(pathToDFDLSchema); + assertTrue(schemaFile.exists()); + RecordWalker walker = new RecordWalker(); + ProcessorFactory factory = Daffodil.compiler().compileFile(schemaFile); + assertFalse(factory.getDiagnostics().toString(), factory.isError()); + walker.walkFromRoot(factory.experimental().rootView()); + RecordSchema obtainedSchema = walker.result(); + assertEquals(1, obtainedSchema.getFieldCount()); + assertEquals("root", obtainedSchema.getField(0).getFieldName()); + rootSchema = ((RecordDataType) obtainedSchema.getField(0).getDataType()).getChildSchema(); + } + + @Test + public void testChoiceWithGroupRef() throws IOException { + setup("src/test/resources/TestDFDLRecordSchema/choiceWithGroupRef.dfdl.xsd"); + assertEquals(1, rootSchema.getFieldCount()); + assertEquals("CHOICE[]", rootSchema.getField(0).getFieldName()); + assertTrue(rootSchema.getField(0).getDataType() instanceof ChoiceDataType); + ChoiceDataType anonChoice = (ChoiceDataType) rootSchema.getField(0).getDataType(); + assertEquals(2, anonChoice.getPossibleSubTypes().size()); + assertTrue(anonChoice.getPossibleSubTypes().get(0) instanceof RecordDataType); + assertTrue(anonChoice.getPossibleSubTypes().get(1) instanceof RecordDataType); + RecordDataType field1 = (RecordDataType) anonChoice.getPossibleSubTypes().get(0); + RecordDataType field2 = (RecordDataType) anonChoice.getPossibleSubTypes().get(1); + assertEquals("field1", field1.getChildSchema().getField(0).getFieldName()); + assertEquals("field2", field2.getChildSchema().getField(0).getFieldName()); + } + + @Test + public void testOptionalField() throws IOException { + setup("src/test/resources/TestDFDLRecordSchema/optionalField.dfdl.xsd"); + assertEquals(2, rootSchema.getFieldCount()); + assertEquals("imOptional", rootSchema.getField(0).getFieldName()); + assertTrue(rootSchema.getField(0) instanceof OptionalRecordField); + assertEquals("imRequired", rootSchema.getField(1).getFieldName()); + assertFalse(rootSchema.getField(1) instanceof OptionalRecordField); + } + +} diff --git a/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/choiceWithGroupRef.dfdl.xsd b/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/choiceWithGroupRef.dfdl.xsd new file mode 100644 index 0000000..ce3d84a --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/choiceWithGroupRef.dfdl.xsd @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/optionalField.dfdl.xsd b/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/optionalField.dfdl.xsd new file mode 100644 index 0000000..81e66dc --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDFDLRecordSchema/optionalField.dfdl.xsd @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd similarity index 99% rename from nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd rename to nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd index 0e65415..accc638 100644 --- a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/bitlength.dfdl.xsd +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/bitlength.dfdl.xsd @@ -29,7 +29,7 @@ - + + + + + + + + + + + + + + + + + + + + + + diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/leftover.bin b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/leftover.bin similarity index 100% rename from nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/leftover.bin rename to nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/leftover.bin diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd new file mode 100644 index 0000000..4c071fc --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/nestedChoices.dfdl.xsd @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover.bin b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/noleftover.bin similarity index 100% rename from nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/noleftover.bin rename to nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/noleftover.bin diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/testConfig.xml b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/testConfig.xml new file mode 100644 index 0000000..ecd3d61 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/testConfig.xml @@ -0,0 +1,5 @@ + + + 2 + diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/tokens.csv b/nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/tokens.csv similarity index 100% rename from nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/tokens.csv rename to nifi-daffodil-processors/src/test/resources/TestDaffodilComponents/tokens.csv diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/noleftover.bin.xml b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/noleftover.bin.xml new file mode 100644 index 0000000..cdcf737 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/noleftover.bin.xml @@ -0,0 +1 @@ +1657297 \ No newline at end of file diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json new file mode 100644 index 0000000..b707dbe --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extVars.txt.json @@ -0,0 +1 @@ +[{"elem":{"a":[1,2,3,4,5],"varValue":"$expectedValue"}}] \ No newline at end of file diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json new file mode 100644 index 0000000..1e7f259 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/simpleList-extraData.txt.json @@ -0,0 +1 @@ +[{"elem":{"a":[1,2,3,4,5],"varValue":0}},{"elem":{"a":[6,7,8,9],"varValue":0}}] \ No newline at end of file diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.json b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.json new file mode 100644 index 0000000..72ac932 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.json @@ -0,0 +1 @@ +[{"file":{"header":{"title":["TOKEN1","TOKEN2","TOKEN3","TOKEN4","TOKEN5","TOKEN6","TOKEN7","TOKEN8","TOKEN9","TOKEN10","TOKEN11"]},"record":[{"item":["1","2","3","4","C:\\dir$abc","6","7","A,B","2014-05-01T30:23:00Z","11","12"]}]}}] \ No newline at end of file diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.xml b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.xml new file mode 100644 index 0000000..0f8d870 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilControllers/tokens.csv.xml @@ -0,0 +1 @@ +
TOKEN1TOKEN2TOKEN3TOKEN4TOKEN5TOKEN6TOKEN7TOKEN8TOKEN9TOKEN10TOKEN11
1234C:\dir$abc67A,B2014-05-01T30:23:00Z1112
\ No newline at end of file diff --git a/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json new file mode 100644 index 0000000..4351f40 --- /dev/null +++ b/nifi-daffodil-processors/src/test/resources/TestDaffodilProcessor/simpleList-extVars.txt.json @@ -0,0 +1 @@ +{"elem": {"a": ["1","2","3","4","5"],"varValue": "$expectedValue"}} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7cfdfd7..e48824b 100644 --- a/pom.xml +++ b/pom.xml @@ -13,18 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 + + 1.8 + 1.8 + 1.11.4 + + org.apache.nifi nifi-nar-bundles - 1.3.0 + 1.11.4 com.tresys nifi-daffodil - 1.5 + 2.0 pom @@ -32,13 +38,4 @@ nifi-daffodil-nar - - - - Apache-Staging - Apache Staging - https://repository.apache.org/content/repositories/orgapachedaffodil-1012 - - -