From e15697f42eb103fb393ab391f53bcd1d7ebd4d38 Mon Sep 17 00:00:00 2001 From: Aled Sage Date: Wed, 22 Jun 2016 13:24:01 +0100 Subject: [PATCH 1/3] Improve/test performance of XmlUtil.xpath --- .../brooklyn/util/core/xstream/XmlUtil.java | 24 +++++++-- .../qa/performance/XmlPerformanceTest.java | 53 +++++++++++++++++++ .../util/core/xstream/XmlUtilTest.java | 5 +- 3 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 core/src/test/java/org/apache/brooklyn/core/test/qa/performance/XmlPerformanceTest.java diff --git a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java index 0619811b93..a969dea589 100644 --- a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java +++ b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java @@ -34,11 +34,29 @@ public class XmlUtil { + /** + * Thread-local storage for sharing the {@link DocumentBuilder}, to avoid repeated construction. + * See {@linkplain http://stackoverflow.com/questions/9828254/is-documentbuilderfactory-thread-safe-in-java-5}. + */ + private static class SharedDocumentBuilder { + private static ThreadLocal instance = new ThreadLocal(); + + public static DocumentBuilder get() throws ParserConfigurationException { + DocumentBuilder result = instance.get(); + if (result == null) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + result = factory.newDocumentBuilder(); + instance.set(result); + } else { + result.reset(); + } + return result; + } + } + public static Object xpath(String xml, String xpath) { - // TODO Could share factory/doc in thread-local storage; see http://stackoverflow.com/questions/9828254/is-documentbuilderfactory-thread-safe-in-java-5 try { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = factory.newDocumentBuilder(); + DocumentBuilder builder = SharedDocumentBuilder.get(); Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); XPathFactory xPathfactory = XPathFactory.newInstance(); XPathExpression expr = xPathfactory.newXPath().compile(xpath); diff --git a/core/src/test/java/org/apache/brooklyn/core/test/qa/performance/XmlPerformanceTest.java b/core/src/test/java/org/apache/brooklyn/core/test/qa/performance/XmlPerformanceTest.java new file mode 100644 index 0000000000..8658a7c542 --- /dev/null +++ b/core/src/test/java/org/apache/brooklyn/core/test/qa/performance/XmlPerformanceTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.brooklyn.core.test.qa.performance; + +import org.apache.brooklyn.test.performance.PerformanceTestDescriptor; +import org.apache.brooklyn.util.core.xstream.XmlUtil; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class XmlPerformanceTest extends AbstractPerformanceTest { + + @BeforeMethod(alwaysRun=true) + @Override + public void setUp() throws Exception { + super.setUp(); + } + + protected int numIterations() { + return 100; + } + + @Test(groups = { "Integration", "Acceptance" }) + public void testXpath() throws Exception { + int numIterations = numIterations(); + double minRatePerSec = 100 * PERFORMANCE_EXPECTATION; + + measure(PerformanceTestDescriptor.create() + .summary("XmlPerformanceTest.testXpath") + .iterations(numIterations) + .minAcceptablePerSecond(minRatePerSec) + .job(new Runnable() { + public void run() { + String xml = "myb"; + XmlUtil.xpath(xml, "/a/b[text()]"); + }})); + } +} diff --git a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java index 3391862b43..65983e3a44 100644 --- a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java +++ b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlUtilTest.java @@ -20,7 +20,6 @@ import static org.testng.Assert.assertEquals; -import org.apache.brooklyn.util.core.xstream.XmlUtil; import org.testng.annotations.Test; @@ -29,6 +28,8 @@ public class XmlUtilTest { @Test public void testXpath() throws Exception { String xml = "myb"; - assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + for (int i = 0; i < 2; i++) { + assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + } } } From 7981326612d20a0d6bd9b7d972620f5e2e0b4fc3 Mon Sep 17 00:00:00 2001 From: Aled Sage Date: Wed, 22 Jun 2016 13:24:30 +0100 Subject: [PATCH 2/3] Adds XmlSerializerTest --- .../util/core/xstream/XmlSerializerTest.java | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java diff --git a/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java new file mode 100644 index 0000000000..f89e770956 --- /dev/null +++ b/core/src/test/java/org/apache/brooklyn/util/core/xstream/XmlSerializerTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.brooklyn.util.core.xstream; + +import static org.testng.Assert.assertEquals; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class XmlSerializerTest { + + private static final Logger LOG = LoggerFactory.getLogger(XmlSerializerTest.class); + + protected XmlSerializer serializer; + + @BeforeMethod(alwaysRun=true) + private void setUp() { + serializer = new XmlSerializer(); + } + + @Test + public void testSimple() throws Exception { + assertSerializeAndDeserialize("abc"); + assertSerializeAndDeserialize(1); + assertSerializeAndDeserialize(true); + assertSerializeAndDeserialize(new StringHolder("abc")); + } + + @Test + public void testIllegalXmlCharacter() throws Exception { + String val = "abc\u001b"; + assertEquals((int)val.charAt(3), 27); // expect that to give us unicode character 27 + assertSerializeAndDeserialize(val); + assertSerializeAndDeserialize(new StringHolder(val)); + } + + protected void assertSerializeAndDeserialize(Object val) throws Exception { + String xml = serializer.toString(val); + Object result = serializer.fromString(xml); + LOG.info("val="+val+"'; xml="+xml+"; result="+result); + assertEquals(result, val); + } + + public static class StringHolder { + public String val; + + StringHolder(String val) { + this.val = val; + } + @Override + public boolean equals(Object obj) { + return (obj instanceof StringHolder) && val.equals(((StringHolder)obj).val); + } + @Override + public int hashCode() { + return val.hashCode(); + } + } +} From 0ef20baf2af093373b97dd0e56566089bfa53bdd Mon Sep 17 00:00:00 2001 From: Aled Sage Date: Wed, 22 Jun 2016 15:09:02 +0100 Subject: [PATCH 3/3] BROOKLYN-305: Handle invalid xml chars in attribute vals --- ...BrooklynMementoPersisterToObjectStore.java | 4 +- .../brooklyn/util/core/xstream/XmlUtil.java | 56 +++++++ .../core/mgmt/rebind/RebindEntityTest.java | 20 +++ .../util/core/xstream/XmlSerializerTest.java | 8 +- .../util/core/xstream/XmlUtilTest.java | 147 ++++++++++++++++++ 5 files changed, 231 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java index e449529618..cce552db71 100644 --- a/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java +++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterToObjectStore.java @@ -294,7 +294,7 @@ public void visit(BrooklynObjectType type, String id, String contentsSubpath) th exceptionHandler.onLoadMementoFailed(type, "memento "+id+" read error", e); } - String xmlId = (String) XmlUtil.xpath(contents, "/"+type.toCamelCase()+"/id"); + String xmlId = (String) XmlUtil.xpathHandlingIllegalChars(contents, "/"+type.toCamelCase()+"/id"); String safeXmlId = Strings.makeValidFilename(xmlId); if (!Objects.equal(id, safeXmlId)) LOG.warn("ID mismatch on "+type.toCamelCase()+", "+id+" from path, "+safeXmlId+" from xml"); @@ -334,7 +334,7 @@ public void visit(BrooklynObjectType type, String objectId, final String content class XPathHelper { private String get(String innerPath) { - return (String) XmlUtil.xpath(contents, prefix+innerPath); + return (String) XmlUtil.xpathHandlingIllegalChars(contents, prefix+innerPath); } } XPathHelper x = new XPathHelper(); diff --git a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java index a969dea589..9f914fa4f4 100644 --- a/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java +++ b/core/src/main/java/org/apache/brooklyn/util/core/xstream/XmlUtil.java @@ -32,6 +32,8 @@ import org.w3c.dom.Document; import org.xml.sax.SAXException; +import com.google.common.annotations.Beta; + public class XmlUtil { /** @@ -73,4 +75,58 @@ public static Object xpath(String xml, String xpath) { throw Exceptions.propagate(e); } } + + /** + * Executes the given xpath on the given xml. If this fails becaues the xml is invalid + * (e.g. contains ""), then it will attempt to escape such illegal characters + * and try again. Note that the *escaped* values may be contained in the returned result! + * The escaping used is the prefix "BR_UNICODE_"; if that string is already in the xml, + * then it will replace that with "NOT_BR_UNICODE_". + */ + @Beta + public static Object xpathHandlingIllegalChars(String xml, String xpath) { + try { + return xpath(xml, xpath); + } catch (Exception e) { + SAXException saxe = Exceptions.getFirstThrowableOfType(e, SAXException.class); + if (saxe != null && saxe.toString().contains("&#")) { + // Looks like illegal chars (e.g. xstream converts unicode char 27 to "", + // which is not valid in XML! Try again with an escaped xml. + Escaper escaper = new Escaper(); + String xmlCleaned = escaper.escape(xml); + try { + Object result = xpath(xmlCleaned, xpath); + if (result instanceof String) { + return escaper.unescape((String)result); + } else { + return result; + } + } catch (Exception e2) { + Exceptions.propagateIfFatal(e2); + } + } + throw e; + } + } + + /** + * Replaces things like "" with "BR_UNICODE_x1b". This is because xstream happily writes + * out such characters (which are not valid in xml), but xpath fails when parsing them. + */ + @Beta + protected static class Escaper { + + public String escape(String string) { + String unicodeRegex = "&#([x0-9a-fA-f]{1,5});"; + return string.replaceAll("BR_UNICODE_", "NOT_BR_UNICODE_") + .replaceAll(unicodeRegex, "BR_UNICODE_$1;"); + } + + public String unescape(String string) { + String unicodeRegex = "(?myb"; @@ -32,4 +55,128 @@ public void testXpath() throws Exception { assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); } } + + @Test + public void testXpathWithEscapedCharsAndXmlVersion1_1() throws Exception { + StringBuilder xml = new StringBuilder(""+"\n"+ + "myb"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + if (isValidUnicodeInXml1_1(i)) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + } + xml.append(""); + assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testXpathWithEscapedCharsAndXmlUnversioned() throws Exception { + StringBuilder xml = new StringBuilder("myb"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + if (isValidUnicodeInXml1_0(i)) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + } + xml.append(""); + assertEquals(XmlUtil.xpath(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testXpathHandlingIllegalChars() throws Exception { + StringBuilder xml = new StringBuilder("myb"); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + String unicode = Integer.toHexString(i); + xml.append("&#x"+unicode+";"); + } + xml.append(""); + assertEquals(XmlUtil.xpathHandlingIllegalChars(xml.toString(), "/a/b[text()]"), "myb"); + } + + @Test + public void testEscaper() throws Exception { + // Escapes unicode char, ignoring things around it + assertEscapeAndUnescape("", "BR_UNICODE_x001b;"); + assertEscapeAndUnescape("", "BR_UNICODE_00027;"); + assertEscapeAndUnescape("", "BR_UNICODE_x1b;"); + assertEscapeAndUnescape("SUFFIX", "BR_UNICODE_x1b;SUFFIX"); + assertEscapeAndUnescape("PREFIX", "PREFIXBR_UNICODE_x1b;"); + assertEscapeAndUnescape("PREFIXSUFFIX", "PREFIXBR_UNICODE_x1b;SUFFIX"); + + // Ignores invalid unicodes + assertEscapeAndUnescape("", ""); // no ";" + assertEscapeAndUnescape("g;", "g;"); // chars out of range + assertEscapeAndUnescape("ƻ", "ƻ"); // too many chars + + // Handles strings that already contain the expected escape sequence + assertEscapeAndUnescape("BR_UNICODE_x1b;", "NOT_BR_UNICODE_x1b;"); + assertEscapeAndUnescape("NOT_BR_UNICODE_x1b;", "NOT_NOT_BR_UNICODE_x1b;"); + assertEscapeAndUnescape("BR_UNICODE_x1b;THEN", "NOT_BR_UNICODE_x1b;THENBR_UNICODE_x1b;"); + } + + protected void assertEscapeAndUnescape(String val) { + assertEscapeAndUnescape(val, Optional.absent()); + } + + protected void assertEscapeAndUnescape(String val, String expectedEscaped) { + assertEscapeAndUnescape(val, Optional.of(expectedEscaped)); + } + + protected void assertEscapeAndUnescape(String val, Optional expectedEscaped) { + Escaper escaper = new XmlUtil.Escaper(); + String escaped = escaper.escape(val); + if (expectedEscaped.isPresent()) { + assertEquals(escaped, expectedEscaped.get()); + } + assertEquals(escaper.unescape(escaped), val); + } + + @Beta + protected boolean isValidUnicodeInXml1_1(int c) { + int min = 0; + int max = 65535; // xFFFF + return min <= c && c <= max && + c != 0 && (c <= 55295 || c >= 57344) && c != 65534; + } + + @Beta + protected boolean isValidUnicodeInXml1_0(int c) { + // see https://www.w3.org/TR/xml/#charsets + return c == 0x9 || c == 0xA || c == 0xD || (0x20 <= c && c <= 0xD7FF) + || (0xE000 <= c && c <= 0xFFFD) || (0x10000 <= c && c <= 0x10FFFF); + } + + // For checking our assumptions about what is a valid and invalid (escaped) unicode char + // in XML version 1.0 and 1.1. + @Test(groups={"WIP"}, enabled=false) + public void testXpathWithEachEscapeCharacterAndXmlVersion() throws Exception { + List errsInXml1_1 = Lists.newArrayList(); + List errsInXmlUnversioned = Lists.newArrayList(); + for (int i = 0; i < Integer.valueOf("FFFF", 16); i++) { + String unicode = Integer.toHexString(i); + try { + String xml = Joiner.on("\n").join( + "", + "myb&#x"+unicode+";"); + assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + assertTrue(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); + } catch (Throwable t) { + LOG.debug("Failed for code "+unicode+": "+t.getMessage()); + errsInXml1_1.add(Integer.valueOf(unicode, 16)); + assertFalse(isValidUnicodeInXml1_1(i), "i="+i+"; unicode="+unicode); + } + try { + String xml = "myb&#x"+unicode+";"; + assertEquals(XmlUtil.xpath(xml, "/a/b[text()]"), "myb"); + assertTrue(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); + } catch (Throwable t) { + LOG.debug("Failed for code "+unicode+": "+t.getMessage()); + errsInXmlUnversioned.add(Integer.valueOf(unicode, 16)); + assertFalse(isValidUnicodeInXml1_0(i), "i="+i+"; unicode="+unicode); + } + } + LOG.info("XML version 1.1 invalidCodes="+errsInXml1_1); + LOG.info("XML unversioned invalidCodes="+errsInXmlUnversioned); + } }