From d2032da711a3ee14ddc499baccefe486072b6c51 Mon Sep 17 00:00:00 2001 From: Arun Manivannan Date: Sat, 12 Aug 2017 13:41:18 +0800 Subject: [PATCH] NIFI-4062 Provide an option to disable DTD validation for EvaluateXPath and EvaluateXQuery --- .../processors/standard/EvaluateXPath.java | 36 +++++++- .../processors/standard/EvaluateXQuery.java | 34 +++++++- .../standard/TestEvaluateXPath.java | 70 ++++++++++++++++ .../standard/TestEvaluateXQuery.java | 84 +++++++++++++++++-- .../TestXml/xml-snippet-embedded-doctype.xml | 33 ++++++++ .../TestXml/xml-snippet-external-doctype.xml | 26 ++++++ 6 files changed, 274 insertions(+), 9 deletions(-) create mode 100644 nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-embedded-doctype.xml create mode 100644 nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-external-doctype.xml diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXPath.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXPath.java index 4ff7e0e1a0c4..08690ba8854a 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXPath.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXPath.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collection; @@ -43,6 +44,7 @@ import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; +import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; @@ -75,10 +77,14 @@ import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.stream.io.BufferedInputStream; import org.apache.nifi.stream.io.BufferedOutputStream; +import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import net.sf.saxon.lib.NamespaceConstant; import net.sf.saxon.xpath.XPathEvaluator; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.XMLReaderFactory; @EventDriven @SideEffectFree @@ -125,6 +131,14 @@ public class EvaluateXPath extends AbstractProcessor { .defaultValue(RETURN_TYPE_AUTO) .build(); + public static final PropertyDescriptor VALIDATE_DTD = new PropertyDescriptor.Builder() + .name("Validate DTD") + .description("Specifies whether or not the XML content should be validated against the DTD.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .build(); + public static final Relationship REL_MATCH = new Relationship.Builder() .name("matched") .description("FlowFiles are routed to this relationship " @@ -162,6 +176,7 @@ protected void init(final ProcessorInitializationContext context) { final List properties = new ArrayList<>(); properties.add(DESTINATION); properties.add(RETURN_TYPE); + properties.add(VALIDATE_DTD); this.properties = Collections.unmodifiableList(properties); } @@ -219,6 +234,24 @@ public void onTrigger(final ProcessContext context, final ProcessSession session } final ComponentLog logger = getLogger(); + final XMLReader xmlReader; + + try { + xmlReader = XMLReaderFactory.createXMLReader(); + } catch (SAXException e) { + logger.error("Error while constructing XMLReader {}", new Object[]{e}); + throw new ProcessException(e.getMessage()); + } + + if (!context.getProperty(VALIDATE_DTD).asBoolean()) { + xmlReader.setEntityResolver(new EntityResolver() { + @Override + public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { + return new InputSource(new StringReader("")); + } + }); + } + final XPathFactory factory = factoryRef.get(); final XPathEvaluator xpathEvaluator = (XPathEvaluator) factory.newXPath(); final Map attributeToXPathMap = new HashMap<>(); @@ -277,7 +310,8 @@ public void onTrigger(final ProcessContext context, final ProcessSession session @Override public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { - final List rootList = (List) slashExpression.evaluate(new InputSource(in), NODESET); + final List rootList = (List) slashExpression.evaluate(new SAXSource(xmlReader, + new InputSource(in)), NODESET); sourceRef.set(rootList.get(0)); } catch (final Exception e) { error.set(e); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXQuery.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXQuery.java index b8ff2eb0072f..cfbb48b32b5c 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXQuery.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateXQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -68,6 +69,7 @@ import org.apache.nifi.stream.io.BufferedInputStream; import org.apache.nifi.stream.io.BufferedOutputStream; import org.w3c.dom.Document; +import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import net.sf.saxon.s9api.DOMDestination; @@ -79,6 +81,9 @@ import net.sf.saxon.s9api.XdmItem; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmValue; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.XMLReaderFactory; @EventDriven @SideEffectFree @@ -148,6 +153,14 @@ public class EvaluateXQuery extends AbstractProcessor { .defaultValue("false") .build(); + public static final PropertyDescriptor VALIDATE_DTD = new PropertyDescriptor.Builder() + .name("Validate DTD") + .description("Specifies whether or not the XML content should be validated against the DTD.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .build(); + public static final Relationship REL_MATCH = new Relationship.Builder() .name("matched") .description("FlowFiles are routed to this relationship when the XQuery is successfully evaluated and the FlowFile " @@ -182,6 +195,7 @@ protected void init(final ProcessorInitializationContext context) { properties.add(XML_OUTPUT_METHOD); properties.add(XML_OUTPUT_OMIT_XML_DECLARATION); properties.add(XML_OUTPUT_INDENT); + properties.add(VALIDATE_DTD); this.properties = Collections.unmodifiableList(properties); } @@ -231,6 +245,24 @@ public void onTrigger(final ProcessContext context, final ProcessSession session final Map attributeToXQueryMap = new HashMap<>(); final Processor proc = new Processor(false); + final XMLReader xmlReader; + + try { + xmlReader = XMLReaderFactory.createXMLReader(); + } catch (SAXException e) { + logger.error("Error while constructing XMLReader {}", new Object[]{e}); + throw new ProcessException(e.getMessage()); + } + + if (!context.getProperty(VALIDATE_DTD).asBoolean()) { + xmlReader.setEntityResolver(new EntityResolver() { + @Override + public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { + return new InputSource(new StringReader("")); + } + }); + } + final XQueryCompiler comp = proc.newXQueryCompiler(); for (final Map.Entry entry : context.getProperties().entrySet()) { @@ -272,7 +304,7 @@ public void onTrigger(final ProcessContext context, final ProcessSession session public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { XQueryEvaluator qe = slashExpression.load(); - qe.setSource(new SAXSource(new InputSource(in))); + qe.setSource(new SAXSource(xmlReader, new InputSource(in))); DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(true); Document dom = dfactory.newDocumentBuilder().newDocument(); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXPath.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXPath.java index 95e475f71fe3..98899bbe14d3 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXPath.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXPath.java @@ -33,6 +33,8 @@ public class TestEvaluateXPath { private static final Path XML_SNIPPET = Paths.get("src/test/resources/TestXml/xml-snippet.xml"); + private static final Path XML_SNIPPET_EMBEDDED_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-embedded-doctype.xml"); + private static final Path XML_SNIPPET_NONEXISTENT_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-external-doctype.xml"); @Test public void testAsAttribute() throws XPathFactoryConfigurationException, IOException { @@ -155,4 +157,72 @@ public void testWriteNodeSetToAttribute() throws XPathFactoryConfigurationExcept assertTrue(outXml.contains("subNode")); assertTrue(outXml.contains("Hello")); } + + @Test + public void testSuccessForEmbeddedDocTypeValidation() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath()); + testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING); + testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "true"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } + + @Test + public void testSuccessForEmbeddedDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath()); + testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING); + testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "false"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } + + @Test + public void testFailureForExternalDocTypeWithDocTypeValidationEnabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath()); + testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_FAILURE, 1); + } + + @Test + public void testSuccessForExternalDocTypeWithDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath()); + testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING); + testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "false"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } + } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXQuery.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXQuery.java index aae441162ec5..05fe9c278f5c 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXQuery.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateXQuery.java @@ -37,13 +37,15 @@ import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; -import org.junit.Ignore; import org.junit.Test; public class TestEvaluateXQuery { private static final Path XML_SNIPPET = Paths.get("src/test/resources/TestXml/fruit.xml"); + private static final Path XML_SNIPPET_EMBEDDED_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-embedded-doctype.xml"); + private static final Path XML_SNIPPET_NONEXISTENT_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-external-doctype.xml"); + private static final String[] fruitNames = {"apple", "apple", "banana", "orange", "blueberry", "raspberry", "none"}; private static final String[] methods = {EvaluateXQuery.OUTPUT_METHOD_XML, EvaluateXQuery.OUTPUT_METHOD_HTML, EvaluateXQuery.OUTPUT_METHOD_TEXT}; @@ -65,7 +67,6 @@ public void testSetTransformerProperties() throws Exception { } } - @Ignore("this test is failing") @Test public void testFormatting() throws Exception { @@ -102,7 +103,7 @@ public void testFormatting() throws Exception { + " apple\n" + " red\n" + " "; - assertEquals(expectedXml, formattedResults.get(0)); + assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0))); } { formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "html", false, false); @@ -113,7 +114,7 @@ public void testFormatting() throws Exception { + " apple\n" + " red\n" + " "; - assertEquals(expectedXml, formattedResults.get(0)); + assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0))); } { formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "text", false, false); @@ -123,7 +124,7 @@ public void testFormatting() throws Exception { + " apple\n" + " red\n" + " "; - assertEquals(expectedXml, formattedResults.get(0)); + assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0))); } { formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "xml", true, false); @@ -135,7 +136,7 @@ public void testFormatting() throws Exception { + " apple\n" + " red\n" + " \n"; - assertEquals(expectedXml, formattedResults.get(0)); + assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0))); } { formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "xml", true, true); @@ -146,10 +147,14 @@ public void testFormatting() throws Exception { + " apple\n" + " red\n" + " \n"; - assertEquals(expectedXml, formattedResults.get(0)); + assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0))); } } + private String spaceTrimmed(String str) { + return Arrays.stream(str.split("\n")).map(String :: trim).reduce("", String :: concat); + } + private List getFormattedResult(Path xml, final String xQuery, final String method, final boolean indent, final boolean omitDeclaration) throws Exception { Map runnerProps = new HashMap<>(); @@ -648,4 +653,69 @@ public void testMatchesMultipleXmlAttribute() throws XPathFactoryConfigurationEx } testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0).assertContentEquals(XML_SNIPPET); } + + @Test + public void testSuccessForEmbeddedDocTypeValidation() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery()); + testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "true"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } + + @Test + public void testSuccessForEmbeddedDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery()); + testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "false"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } + + + @Test + public void testFailureForExternalDocTypeWithDocTypeValidationEnabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery()); + testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_FAILURE, 1); + } + + + @Test + public void testSuccessForExternalDocTypeWithDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException { + final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery()); + testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT); + testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "false"); + testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()"); + + testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0); + final byte[] outData = testRunner.getContentAsByteArray(out); + final String outXml = new String(outData, "UTF-8"); + assertTrue(outXml.trim().equals("Hello")); + } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-embedded-doctype.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-embedded-doctype.xml new file mode 100644 index 000000000000..e5de35d7ec75 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-embedded-doctype.xml @@ -0,0 +1,33 @@ + + + + + +]> + + + + + + Hello + + + World! + + + \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-external-doctype.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-external-doctype.xml new file mode 100644 index 000000000000..e9e3020524ba --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestXml/xml-snippet-external-doctype.xml @@ -0,0 +1,26 @@ + + + + + + + Hello + + + World! + + + \ No newline at end of file