Browse files

Implement new StAX-based reader as a new JDOM input source.

Tatu Saloranta submitted a reader before, but this implementation is
significantly different from that because, in general, the JDOM input
mechanisms do not do any formatting/restructuring as that is done by the
outputters. Thus, this implementation is a very 'thin' one. This
implementation also adds support for sourcing from an XMLEventReader, as
well as spporting the DTD event.
With the code come the tests.
  • Loading branch information...
1 parent 44bb5f3 commit 59ee0d5384843d841c9a1f5fe8dd3b8fda2c8524 @rolfl rolfl committed Oct 31, 2011
View
423 core/src/java/org/jdom2/input/DTDParser.java
@@ -0,0 +1,423 @@
+/*--
+
+ Copyright (C) 2000-2011 Jason Hunter & Brett McLaughlin.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the disclaimer that follows
+ these conditions in the documentation and/or other materials
+ provided with the distribution.
+
+ 3. The name "JDOM" must not be used to endorse or promote products
+ derived from this software without prior written permission. For
+ written permission, please contact <request_AT_jdom_DOT_org>.
+
+ 4. Products derived from this software may not be called "JDOM", nor
+ may "JDOM" appear in their name, without prior written permission
+ from the JDOM Project Management <request_AT_jdom_DOT_org>.
+
+ In addition, we request (but do not require) that you include in the
+ end-user documentation provided with the redistribution and/or in the
+ software itself an acknowledgement equivalent to the following:
+ "This product includes software developed by the
+ JDOM Project (http://www.jdom.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos
+ available at http://www.jdom.org/images/logos.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many
+ individuals on behalf of the JDOM Project and was originally
+ created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
+ Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
+ on the JDOM Project, please see <http://www.jdom.org/>.
+
+ */
+
+package org.jdom2.input;
+
+import java.util.HashMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jdom2.DocType;
+import org.jdom2.JDOMException;
+import org.jdom2.JDOMFactory;
+
+/**
+ * In StAX Reader, the DocType is available as a single string.
+ * We need to harvest some data from it, as well as reformat it only to build
+ * a standardized DocType instance.
+ * <p>
+ * The assumption is that the DTD is valid.
+ * <p>
+ * We need to pull out 4 elements of data:
+ * <ol>
+ * <li>The root element name
+ * <li>The SystemID (if available)
+ * <li>The PublicID (if available)
+ * <li>The internal subset (if available)
+ * </ol>
+ *
+ * The internal-subset should be re-formatted to conform to the JDOM 'standard'
+ * where each declaration starts on a new line indented with 2 spaces. This
+ * 'standard' is defined by the way the JDOM formats the DTD declarations in the
+ * SAX parse process, which fires individual events for the content in the DTD.
+ * <p>
+ * We can do this all with a well-structured regular expression, which, is
+ * actually simpler than trying to fish out all the components ourselves....
+ * <p>
+ *
+ * @author Rolf Lear
+ *
+ */
+public class DTDParser {
+
+ /*
+ * =======================================================================
+ *
+ * READ THIS...
+ *
+ *
+ * This code works by using a reg-ex to parse a valid DTD document.
+ * The pattern is complicated (not as complicated as an actual parser).
+ *
+ * Because the pattern is complicated this code creates a pattern 'database'
+ * and then 'pulls' patterns from the database create the final regex. The
+ * database patterns are pulled to transform a pattern template in to a
+ * final regular expression. This template is called the 'meta-pattern'
+ *
+ * So, the pattern is not kept in it's final form, but rather it is built
+ * up at class initialization time based on the meta-pattern, and the
+ * pattern database in the map.
+ *
+ * This is the final pattern: (broken over a few lines)
+ *
+ * [\s\r\n\t]*<!DOCTYPE[\s\r\n\t]+([^\s\r\n\t\[>]+)([\s\r\n\t]+
+ * ((SYSTEM[\s\r\n\t]+(('([^']*)')|("([^"]*)")))|
+ * (PUBLIC[\s\r\n\t]+(('([^']*)')|("([^"]*)"))([\s\r\n\t]+
+ * (('([^']*)')|("([^"]*)")))?)))?([\s\r\n\t]*\[(.*)\])?
+ * [\s\r\n\t]*>[\s\r\n\t]*
+ *
+ * You will agree that it's simpler to built the pattern than read it....
+ *
+ * With the above in mind, you can easily follow the way the pattern is
+ * built as it is simply a repeating use of some of the base constructs.
+ * =======================================================================
+ */
+
+ /**
+ * This is the meta-pattern.
+ * <p>
+ * <ul>
+ * <li>Where you see ' os ' there is optional space.
+ * <li>Where you see ' name ' there is the element name.
+ * <li>Where you see ' ms ' there is mandatory space.
+ * <li>Where you see ' id ' there is some quoted identifier.
+ * <li>Where you see ' internal ' there is the internal subset.
+ * </ul>
+ * Anything else will become part of the final regex.
+ * <p>
+ * Space ('&nbsp;') was chosen for the token delimiter because it
+ * makes the meta-pattern easy to read. There are a couple of places in
+ * this expression where there are two ' ' together, and it is critical
+ * that it does not change because there will be missed token matches then.
+ */
+ private static final String metapattern =
+ // The lead-in and the Element name
+ " os <!DOCTYPE ms ( name )" +
+ // The Public/System references, if any
+ "( ms ((SYSTEM ms id )|(PUBLIC ms id ( ms id )?)))?" +
+ // The Internal Subset, if any.
+ "( os \\[( internal )\\])?" +
+ // The lead-out.
+ " os > os ";
+
+ /**
+ * This builds a substitution map containing the raw patterns for
+ * certain types of content we expect.
+ * @return The populated map.
+ */
+ private static final HashMap<String,String> populatePatterns() {
+ HashMap<String,String> p = new HashMap<String, String>();
+ // The name is important to understand. The assumption is that the
+ // doctype is valid, hence it is easier to search for what the name is
+ // not, and not what it is. The name will be terminated with either
+ // white-space, [ or >
+ p.put("name", "[^ \\n\\r\\t\\[>]+"); // element name.
+
+ // whitespace: S ::= (#x20 | #x9 | #xD | #xA)+
+ p.put("ms", "[ \\n\\r\\t]+"); // mandatory whitespace.
+ p.put("os", "[ \\n\\r\\t]*"); // optional whitespace.
+
+ // A quoted 'id'/"id" is anything except the quote
+ // we need to do parenthesis in this to get grouping to work.
+ // also need parenthesis to make the | or condition work
+ p.put("id", "(('([^']*)')|(\"([^\"]*)\"))"); // quoted id.
+
+ // The internal subset is treated differently by the code, and the
+ // [ ] bracing around the internal subset is specified in the main regex
+ p.put("internal", ".*"); // internal subset.
+ return p;
+ }
+
+ /**
+ * This method substitutes the simple tokens in the meta-pattern with
+ * the declared values in the map.
+ * @param map The map containing substitution tokens/patterns
+ * @param input The meta-pattern to do the substitutions on.
+ * @return The substituted pattern
+ */
+ private static final Pattern buildPattern(
+ HashMap<String,String> map, String input) {
+ // we are going to search for tokens. Each token is marked by a space.
+ // space was chosen because it makes the meta-pattern easy to read.
+ final Pattern search = Pattern.compile(" (\\w+) ");
+ final Matcher mat = search.matcher(input);
+ StringBuilder sb = new StringBuilder();
+ int pos = 0;
+ while (mat.find()) {
+ String rep = map.get(mat.group(1));
+// we wrote this, it can't happen ;-). Live with a 'null' append.
+// if (rep == null) {
+// throw new IllegalArgumentException(
+// "No definition of token '" + mat.group() + "'.");
+// }
+ // can't use appendReplacement as we have to escape '\' chars.
+ // and Pattern.quote() does not help
+ // mat.appendReplacement(sb, rep);
+ sb.append(input.substring(pos, mat.start()));
+ sb.append(rep);
+ pos = mat.end();
+ }
+ sb.append(input.substring(pos));
+ return Pattern.compile(sb.toString(), Pattern.DOTALL);
+ }
+
+ /**
+ * The following Pattern is the final result after
+ * parsing/tokenizing/substituting the meta-pattern.
+ */
+ private static final Pattern pattern =
+ buildPattern(populatePatterns(), metapattern);
+
+ /*
+ * This pattern relies on pattern grouping to easily pull the values from
+ * the Matcher. Look at the following to get an idea of the groups that
+ * come from the reg-ex
+ *
+ * 0 -> <!DOCTYPE root SYSTEM "system" [internal] >
+ * 1 -> root
+ * 2 -> SYSTEM "system"
+ * 3 -> SYSTEM "system"
+ * 4 -> SYSTEM "system"
+ * 5 -> "system"
+ * 6 -> null
+ * 7 -> null
+ * 8 -> "system"
+ * 9 -> system
+ * 10 -> null
+ * 11 -> null
+ * 12 -> null
+ * 13 -> null
+ * 14 -> null
+ * 15 -> null
+ * 16 -> null
+ * 17 -> null
+ * 18 -> null
+ * 19 -> null
+ * 20 -> null
+ * 21 -> null
+ * 22 -> [internal]
+ * 23 -> internal
+ *
+ *
+ * 0 -> <!DOCTYPE root PUBLIC 'public' 'system' [internal] >
+ * 1 -> root
+ * 2 -> PUBLIC 'public' 'system'
+ * 3 -> PUBLIC 'public' 'system'
+ * 4 -> null
+ * 5 -> null
+ * 6 -> null
+ * 7 -> null
+ * 8 -> null
+ * 9 -> null
+ * 10 -> PUBLIC 'public' 'system'
+ * 11 -> 'public'
+ * 12 -> 'public'
+ * 13 -> public
+ * 14 -> null
+ * 15 -> null
+ * 16 -> 'system'
+ * 17 -> 'system'
+ * 18 -> 'system'
+ * 19 -> system
+ * 20 -> null
+ * 21 -> null
+ * 22 -> [internal]
+ * 23 -> internal
+ *
+ *
+ */
+
+ /**
+ * Looks in any number of matched groups for a value. Returns the first set
+ * value. The assumption is that, depending on the pattern matches, the
+ * value could be in a few different locations.
+ * @param mat The match that has succeeded
+ * @param groups The groups to check for a value.
+ * @return The first found value.
+ */
+ private static final String getGroup(final Matcher mat, final int...groups) {
+ for (final int g : groups) {
+ final String s = mat.group(g);
+ if (s != null) {
+ return s;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * return true if the input character is one of the types recognized in the
+ * DTD spec.
+ * @param ch The char to check
+ * @return true if it is a space, tab, newline, or carriage-return.
+ */
+ private static final boolean isWhite(char ch) {
+ return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
+ }
+
+ /**
+ * Reformat an internal subset.... Each declaration starts on an indented
+ * newline.
+ * @param internal the input DocType declaration as found in a StAX Reader.
+ * @return the formatted input.
+ */
+ private static String formatInternal(String internal) {
+ StringBuilder sb = new StringBuilder(internal.length());
+ char quote = ' ';
+ boolean white = true;
+ for (char ch : internal.toCharArray()) {
+ if (quote == ' ') {
+ // we are not in a quoted value...
+ if (isWhite(ch)) {
+ if (!white) {
+ // this will be the first whitespace.
+ // replace it with a single ' '
+ sb.append(' ');
+ white = true;
+ }
+ // subsequent (unquoted) whitespace is ignored
+ } else {
+ if (ch == '\'' || ch == '"') {
+ // we are entering a quoted value.
+ quote = ch;
+ } else if (ch == '<') {
+ // we are starting some form of declaration.
+ sb.append(" ");
+ }
+
+ if (ch == '>') {
+ // we are ending a declaration.
+ if (white) {
+ // the declaration ended with whitespace, which we
+ // remove.
+ sb.setCharAt(sb.length() - 1, ch);
+ } else {
+ // the declaration had no whitespace at the end. OK
+ sb.append(ch);
+ }
+ // all declarations end with a new-line.
+ sb.append('\n');
+ // and subsequent lines start as trimmed whitespace.
+ white = true;
+ } else {
+ sb.append(ch);
+ white = false;
+ }
+ }
+ } else {
+ // we are in a quoted value...
+ if (ch == quote) {
+ //we are leaving the quoted value.
+ quote = ' ';
+ }
+ sb.append(ch);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Parse out a DOCTYPE declaration as supplied by the standard StAX
+ * readers.
+ * <p>
+ * Using 'XML' terminology, this method assumes that the input is
+ * both 'well-formed' and 'valid'. The assumptions that this class makes
+ * ensure that the 'right thing' is done for valid content, but invalid
+ * content may or may not fail with a JDOMException. The behaviour of this
+ * method with invalid input is 'undefined'.
+ *
+ * @param input the input DOCTYPE string to parse. Must be valid.
+ * @param factory The JDOM factory to use to build the JDOM DocType.
+ * @return The input string as a DocType.
+ * @throws JDOMException if the DocType is not generated.
+ */
+ public static DocType parse(final String input, final JDOMFactory factory)
+ throws JDOMException {
+
+ // Match the input to the DOCTYPE pattern matcher.
+ final Matcher mat = pattern.matcher(input);
+ if (!mat.matches()) {
+ throw new JDOMException("Doctype input does not appear to be valid: " + input);
+ }
+
+ // Get the four data components.
+ final String docemt = mat.group(1);
+ final String sysid = getGroup(mat, 7, 9, 19, 21);
+ final String pubid = getGroup(mat, 13, 15);
+ final String internal = getGroup(mat, 23);
+
+ // Use the appropriate constructor for the DocType.
+ DocType dt = null;
+ if (pubid != null) {
+ dt = factory.docType(docemt, pubid, sysid);
+ } else if (sysid != null) {
+ dt = factory.docType(docemt, sysid);
+ } else {
+ dt = factory.docType(docemt);
+ }
+ // Set the internal subset, if any.
+ if (internal != null) {
+ dt.setInternalSubset(formatInternal(internal));
+ }
+ return dt;
+ }
+
+ /**
+ * Make instances 'impossible'. Everything is static.
+ */
+ private DTDParser() {
+ // nothing, you are not allowed instances of this class.
+ }
+
+}
View
439 core/src/java/org/jdom2/input/StAXBuilder.java
@@ -0,0 +1,439 @@
+/*--
+
+ Copyright (C) 2000-2011 Jason Hunter & Brett McLaughlin.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the disclaimer that follows
+ these conditions in the documentation and/or other materials
+ provided with the distribution.
+
+ 3. The name "JDOM" must not be used to endorse or promote products
+ derived from this software without prior written permission. For
+ written permission, please contact <request_AT_jdom_DOT_org>.
+
+ 4. Products derived from this software may not be called "JDOM", nor
+ may "JDOM" appear in their name, without prior written permission
+ from the JDOM Project Management <request_AT_jdom_DOT_org>.
+
+ In addition, we request (but do not require) that you include in the
+ end-user documentation provided with the redistribution and/or in the
+ software itself an acknowledgement equivalent to the following:
+ "This product includes software developed by the
+ JDOM Project (http://www.jdom.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos
+ available at http://www.jdom.org/images/logos.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many
+ individuals on behalf of the JDOM Project and was originally
+ created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
+ Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
+ on the JDOM Project, please see <http://www.jdom.org/>.
+
+ */
+
+package org.jdom2.input;
+
+import java.util.Iterator;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+import javax.xml.stream.events.Characters;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+
+import org.jdom2.AttributeType;
+import org.jdom2.Comment;
+import org.jdom2.DefaultJDOMFactory;
+import org.jdom2.DocType;
+import org.jdom2.Document;
+import org.jdom2.Element;
+import org.jdom2.JDOMException;
+import org.jdom2.JDOMFactory;
+import org.jdom2.Namespace;
+import org.jdom2.ProcessingInstruction;
+
+/**
+ * Builds a JDOM document from a StAX-based XMLStremReader.
+ * <p>
+ * XMLStreamReaders are pre-configured and as a result JDOM is not able to
+ * alter whether the input is validated, or whether the Stream has escaped
+ * entities or not. These (and other) characteristics are configurable by
+ * setting the correct features and properties on the XMLInputFactory when it
+ * is used to create the XMLStreamReader.
+ * <p>
+ * Useful configuration to set, or know about is:
+ * <ul>
+ * <li>StAX streams seldom differentiate between Text and CDATA content. You
+ * will likely want to configure your StAX factory (XMLInputFactory) with
+ * <code>http://java.sun.com/xml/stream/properties/report-cdata-event</code>
+ * for the default Java StAX implementation, or the equivalent property for your
+ * StAX engine.
+ * <li>The remaining XMLInputFactory settings are likely to work fine at their
+ * default values.
+ * <li>StAX is not likely to be your best option if you want a validating
+ * parser, at least not with the default (built-in Java implementation in Java6
+ * which does not support it). Consider a SAX parser.
+ * </ul>
+ * <p>
+ * From a JDOM perspective XMLStreamReaders are more efficient than
+ * XMLEventReaders. Where possible use an XMLStreamReader.
+ * <p>
+ * If you happen to be looking at the source code, pay careful attention to the
+ * imports so you know what type of instance is being processed, whether it is
+ * a StAX class, or a JDOM class, because there are name conflicts.
+ *
+ * @author Rolf Lear
+ *
+ */
+public class StAXBuilder implements XMLStreamConstants {
+
+ /**
+ * Create a Document from an XMLStreamReader
+ * @param factory The {@link JDOMFactory} to use
+ * @param stream The XMLStreamReader to read from
+ * @return the parsed Document
+ * @throws JDOMException if there is any issue
+ * (XMLStreamExceptions are wrapped).
+ */
+ private static final Document process(final JDOMFactory factory,
+ final XMLStreamReader stream) throws JDOMException {
+ try {
+
+ final Document document = factory.document(null);
+
+ Element current = null;
+
+ int state = stream.getEventType();
+
+ if (XMLStreamConstants.START_DOCUMENT != state) {
+ throw new JDOMException("JDOM requires that XMLStreamReaders " +
+ "are at their beginning when being processed.");
+ }
+
+
+ while (state != XMLStreamConstants.END_DOCUMENT) {
+ switch (state) {
+
+ case START_DOCUMENT:
+ // for the <?xml version="..." standalone=".."?>
+ document.setBaseURI(stream.getLocation().getSystemId());
+ document.setProperty("ENCODING_SCHEME",
+ stream.getCharacterEncodingScheme());
+ document.setProperty("STANDALONE",
+ String.valueOf(stream.isStandalone()));
+ document.setProperty("ENCODING",
+ stream.getEncoding());
+ break;
+
+ case DTD:
+ final DocType dtype = DTDParser.parse(
+ stream.getText(), factory);
+ document.setDocType(dtype);
+ break;
+
+ case START_ELEMENT:
+ final Element emt = processElement(factory, stream);
+ if (current == null) {
+ document.setRootElement(emt);
+ final DocType dt = document.getDocType();
+ if (dt != null) {
+ dt.setElementName(emt.getName());
+ }
+ } else {
+ current.addContent(emt);
+ }
+ current = emt;
+ break;
+
+ case END_ELEMENT:
+ current = current.getParentElement();
+ break;
+
+ case CDATA:
+ if (current != null) {
+ current.addContent(factory.cdata(stream.getText()));
+ }
+ break;
+
+ case SPACE:
+ case CHARACTERS:
+ if (current != null) {
+ current.addContent(factory.text(stream.getText()));
+ }
+ break;
+
+ case COMMENT:
+ if (current == null) {
+ document.addContent(
+ factory.comment(stream.getText()));
+ } else {
+ current.addContent(
+ factory.comment(stream.getText()));
+ }
+ break;
+
+ case ENTITY_REFERENCE:
+ if (current != null) {
+ current.addContent(
+ factory.entityRef(stream.getLocalName()));
+ }
+ break;
+
+ case PROCESSING_INSTRUCTION:
+ if (current == null) {
+ document.addContent(factory.processingInstruction(
+ stream.getPITarget(), stream.getPIData()));
+ } else {
+ current.addContent(factory.processingInstruction(
+ stream.getPITarget(), stream.getPIData()));
+ }
+ break;
+
+ default:
+ throw new JDOMException("Unexpected XMLStream event " + state);
+
+ }
+ if (stream.hasNext()) {
+ state = stream.next();
+ } else {
+ throw new JDOMException("Unexpected end-of-XMLStreamReader");
+ }
+ }
+ return document;
+ } catch (final XMLStreamException xse) {
+ throw new JDOMException("Unable to process XMLStream. See Cause.", xse);
+ }
+ }
+
+ private static final Element processElement(final JDOMFactory factory,
+ final XMLStreamReader reader) {
+
+ final Element element = factory.element(reader.getLocalName(),
+ Namespace.getNamespace(reader.getPrefix(),
+ reader.getNamespaceURI()));
+
+ // Handle attributes
+ for (int i=0, len=reader.getAttributeCount(); i<len; i++) {
+ factory.setAttribute(element, factory.attribute(
+ reader.getAttributeLocalName(i),
+ reader.getAttributeValue(i),
+ AttributeType.getAttributeType(reader.getAttributeType(i)),
+ Namespace.getNamespace(reader.getAttributePrefix(i),
+ reader.getAttributeNamespace(i))));
+ }
+
+ // Handle Namespaces
+ for (int i = 0, len = reader.getNamespaceCount(); i < len; i++) {
+ element.addNamespaceDeclaration(Namespace.getNamespace(
+ reader.getNamespacePrefix(i), reader.getNamespaceURI(i)));
+ }
+
+ return element;
+ }
+
+
+ /**
+ * Create a Document from an XMLEventReader
+ * @param factory the {@link JDOMFactory} to use
+ * @param stream the XMLEventReader to read from
+ * @return the parsed Document
+ * @throws JDOMException if there is any issue
+ * (XMLStreamExceptions are wrapped).
+ */
+ private static final Document process(final JDOMFactory factory,
+ final XMLEventReader events) throws JDOMException {
+ try {
+
+ final Document document = factory.document(null);
+ Element current = null;
+
+ XMLEvent event = events.peek();
+
+ if (XMLStreamConstants.START_DOCUMENT != event.getEventType()) {
+ throw new JDOMException("JDOM requires that XMLStreamReaders " +
+ "are at their beginning when being processed.");
+ }
+
+
+
+ while (event.getEventType() != XMLStreamConstants.END_DOCUMENT) {
+ if (event.isStartDocument()) {
+ document.setBaseURI(event.getLocation().getSystemId());
+ document.setProperty("ENCODING_SCHEME",
+ ((javax.xml.stream.events.StartDocument)event).getCharacterEncodingScheme());
+ document.setProperty("STANDALONE", String.valueOf(
+ ((javax.xml.stream.events.StartDocument)event).isStandalone()));
+ // document.setProperty("ENCODING",
+ // ((StartDocument)event).getEncoding());
+ } else if (event instanceof javax.xml.stream.events.DTD) {
+ //List<?> list = (List<?>)reader.getProperty("javax.xml.stream.entities");
+ //System.out.println(list);
+ final DocType dtype = DTDParser.parse(((javax.xml.stream.events.DTD)event).getDocumentTypeDeclaration(), factory);
+ document.setDocType(dtype);
+ } else if (event.isStartElement()) {
+ final Element emt = processElement(factory, event.asStartElement());
+ if (current == null) {
+ document.setRootElement(emt);
+ final DocType dt = document.getDocType();
+ if (dt != null) {
+ dt.setElementName(emt.getName());
+ }
+ } else {
+ current.addContent(emt);
+ }
+ current = emt;
+ } else if (event.isCharacters()) {
+ final Characters chars = event.asCharacters();
+ if (chars.isCData()) {
+ current.addContent(factory.cdata(
+ ((Characters)event).getData()));
+ } else {
+ current.addContent(factory.text(
+ ((Characters)event).getData()));
+ }
+ } else if (event instanceof javax.xml.stream.events.Comment) {
+ final Comment comment = factory.comment(
+ ((javax.xml.stream.events.Comment)event).getText());
+ if (current == null) {
+ document.addContent(comment);
+ } else {
+ current.addContent(comment);
+ }
+ } else if (event.isEntityReference()) {
+ current.addContent(factory.entityRef(
+ ((javax.xml.stream.events.EntityReference)event).getName()));
+ } else if (event.isProcessingInstruction()) {
+ final ProcessingInstruction pi = factory.processingInstruction(
+ ((javax.xml.stream.events.ProcessingInstruction)event).getTarget(),
+ ((javax.xml.stream.events.ProcessingInstruction)event).getData());
+ if (current == null) {
+ document.addContent(pi);
+ } else {
+ current.addContent(pi);
+ }
+ } else if (event.isEndElement()) {
+ current = current.getParentElement();
+ }
+ if (events.hasNext()) {
+ event = events.nextEvent();
+ } else {
+ break;
+ }
+ }
+ return document;
+ } catch (final XMLStreamException xse) {
+ throw new JDOMException("Unable to process XMLStream. See Cause.", xse);
+ }
+ }
+
+ private static final Element processElement(final JDOMFactory factory,
+ final StartElement event) {
+ final QName qname = event.getName();
+
+ final Element element = factory.element(qname.getLocalPart(),
+ Namespace.getNamespace(qname.getPrefix(), qname.getNamespaceURI()));
+
+ // Handle attributes
+ for (final Iterator<?> it = event.getAttributes();
+ it.hasNext(); ) {
+
+ final javax.xml.stream.events.Attribute att =
+ (javax.xml.stream.events.Attribute)it.next();
+
+ final QName aqname = att.getName();
+
+ final Namespace attNs = Namespace.getNamespace(aqname.getPrefix(),
+ aqname.getNamespaceURI());
+
+ factory.setAttribute(element, factory.attribute(
+ aqname.getLocalPart(), att.getValue(),
+ AttributeType.getAttributeType(att.getDTDType()), attNs));
+ }
+
+ for (final Iterator<?> it = event.getNamespaces(); it.hasNext();) {
+ final javax.xml.stream.events.Namespace ns =
+ (javax.xml.stream.events.Namespace)it.next();
+
+ element.addNamespaceDeclaration(Namespace.getNamespace(
+ ns.getPrefix(), ns.getNamespaceURI()));
+ }
+
+ return element;
+ }
+
+
+
+ /** The factory to use for parsing */
+ private JDOMFactory factory = new DefaultJDOMFactory();
+
+ /**
+ * Returns the current {@link org.jdom2.JDOMFactory} in use.
+ * @return the factory in use
+ */
+ public JDOMFactory getFactory() {
+ return factory;
+ }
+
+ /**
+ * This sets a custom JDOMFactory for the builder. Use this to build
+ * the tree with your own subclasses of the JDOM classes.
+ *
+ * @param factory <code>JDOMFactory</code> to use
+ */
+ public void setFactory(JDOMFactory factory) {
+ this.factory = factory;
+ }
+
+ /**
+ * This builds a document from the supplied
+ * XMLStreamReader.
+ * <p>
+ * The JDOMContent will be built by the current JDOMFactory.
+ *
+ * @param reader <code>XMLStreamReader</code> to read from
+ * @return <code>Document</code> resultant Document object
+ * @throws JDOMException when errors occur in parsing
+ */
+ public Document build(XMLStreamReader reader) throws JDOMException {
+ return process(factory, reader);
+ }
+
+ /**
+ * This builds a document from the supplied
+ * XMLEventReader.
+ * <p>
+ * The JDOMContent will be built by the current JDOMFactory.
+ *
+ * @param events <code>XMLEventReader</code> to read from
+ * @return <code>Document</code> resultant Document object
+ * @throws JDOMException when errors occur in parsing
+ */
+ public Document build(XMLEventReader events) throws JDOMException {
+ return process(factory, events);
+ }
+
+}
View
10 test/resources/DOMBuilder/complex.xml
@@ -1,6 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- root comment -->
+
<?jdomtest root level ?>
+
<root att1="val1" att2="val2" >
text
<child att="child1" xml:space="preserve"> hello Frodo Baggins! </child>
@@ -10,4 +12,10 @@
<!-- comment -->
<child att="child4" unresolved="&amp;"/>
<child att="child5" > <![CDATA[some cdata text ]]> </child>
-</root>
+ <child att="child6" >
+ <leaf att="Leaf6" />
+ </child>
+</root>
+
+
+
View
336 test/src/java/org/jdom2/test/cases/input/TestDTDParser.java
@@ -0,0 +1,336 @@
+package org.jdom2.test.cases.input;
+
+import static org.junit.Assert.*;
+
+import org.jdom2.DocType;
+import org.jdom2.JDOMException;
+import org.jdom2.JDOMFactory;
+import org.jdom2.DefaultJDOMFactory;
+import org.jdom2.input.DTDParser;
+import org.jdom2.test.util.UnitTestUtil;
+import org.junit.Test;
+
+@SuppressWarnings("javadoc")
+public class TestDTDParser {
+
+ private static final JDOMFactory factory = new DefaultJDOMFactory();
+
+ @Test
+ public void testParseSimple() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSimpleCompact() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root>",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSimpleCompactInternal() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root[internal]>",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals("internal", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSYSTEMquotNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root SYSTEM \"system\" >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSYSTEMaposNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root SYSTEM 'system' >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSYSTEMquotSimple() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root SYSTEM \"system\" [internal] >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals("internal", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseSYSTEMaposSimple() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root SYSTEM 'system' [internal] >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals("internal", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICquotenullNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC \"public\" >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposnullNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC 'public' >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICquotquotNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC \"public\" \"system\" >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICquotaposNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC \"public\" 'system' >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposquotNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC 'public' \"system\" >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposaposNONE() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC 'public' 'system' >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposaposSimple() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC 'public' 'system' [internal] >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals("internal", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposaposSimpleCompact() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root PUBLIC 'public' 'system'[internal]>",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals("public", dt.getPublicID());
+ assertEquals("system", dt.getSystemID());
+ assertEquals("internal", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParsePUBLICaposaposSimpleSpacy() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ " <!DOCTYPE root PUBLIC ' public ' ' system ' [ <!ENTITY " +
+ " ent\n EntityDef > ] > ",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(" public ", dt.getPublicID());
+ assertEquals(" system ", dt.getSystemID());
+ assertEquals(" <!ENTITY ent EntityDef>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalA() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root [<!ELEMENT root (#PCDATA)><!ENTITY xpd 'Expand Me!' >]>",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ELEMENT root (#PCDATA)>\n <!ENTITY xpd 'Expand Me!'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalEmbeddedNewlines() throws JDOMException {
+ DocType dt = DTDParser.parse(
+ "<!DOCTYPE root \t \r \n [ \r \n <!ELEMENT root\n (#PCDATA)> \n <!ENTITY xpd \n 'Expand Me!' >\n ] \n >",
+ factory);
+
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ELEMENT root (#PCDATA)>\n <!ENTITY xpd 'Expand Me!'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseIncomplete() {
+ try {
+ DTDParser.parse("<!DOCTYPE root",factory);
+ UnitTestUtil.failNoException(JDOMException.class);
+ } catch (Exception e) {
+ UnitTestUtil.checkException(JDOMException.class, e);
+ }
+
+ }
+
+ @Test
+ public void testParseSpace() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseTab() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE\troot>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseNewline() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE\nroot>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseCarriageReturn() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE\rroot>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(null, dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalSpace() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [ <!ENTITY ent 'entity' > ] >",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent 'entity'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalTab() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [\t<!ENTITY\tent\t'entity'\t>\t]\t>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent 'entity'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalNewline() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [\n<!ENTITY\nent\n'entity'\n>\n]\n>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent 'entity'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalCarriageReturn() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [\r<!ENTITY\rent\r'entity'\r>\r]\r>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent 'entity'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalWithAPosSpace() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [<!ENTITY ent 'entity with spaces\nand newlines,\ttabs, and crs\r' >]>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent 'entity with spaces\nand newlines,\ttabs, and crs\r'>\n", dt.getInternalSubset());
+ }
+
+ @Test
+ public void testParseInternalWithQuoteSpace() throws JDOMException {
+ DocType dt = DTDParser.parse("<!DOCTYPE root [<!ENTITY ent \"entity with spaces\nand newlines,\ttabs, and crs\r\" >]>",factory);
+ assertEquals("root", dt.getElementName());
+ assertEquals(null, dt.getPublicID());
+ assertEquals(null, dt.getSystemID());
+ assertEquals(" <!ENTITY ent \"entity with spaces\nand newlines,\ttabs, and crs\r\">\n", dt.getInternalSubset());
+ }
+
+}
View
191 test/src/java/org/jdom2/test/cases/input/TestStAXBuilder.java
@@ -0,0 +1,191 @@
+package org.jdom2.test.cases.input;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.CharArrayWriter;
+import java.io.File;
+import java.io.IOException;
+
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamReader;
+import javax.xml.transform.stream.StreamSource;
+
+import org.jdom2.*;
+import org.jdom2.input.SAXBuilder;
+import org.jdom2.input.StAXBuilder;
+import org.jdom2.output.Format;
+import org.jdom2.output.XMLOutputter;
+import org.jdom2.test.util.UnitTestUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+
+@SuppressWarnings("javadoc")
+public class TestStAXBuilder {
+
+ @Test
+ public void testStAXBuilder() {
+ StAXBuilder db = new StAXBuilder();
+ assertNotNull(db);
+ }
+
+ @Test
+ public void testFactory() {
+ StAXBuilder db = new StAXBuilder();
+ assertTrue(db.getFactory() instanceof DefaultJDOMFactory);
+ DefaultJDOMFactory fac = new DefaultJDOMFactory();
+ assertFalse(db.getFactory() == fac);
+ db.setFactory(fac);
+ assertTrue(db.getFactory() == fac);
+ }
+
+ @Test
+ public void testSimpleDocumentExpand() {
+ checkStAX("test/resources/DOMBuilder/simple.xml", true);
+ }
+
+ @Test
+ public void testAttributesDocumentExpand() {
+ checkStAX("test/resources/DOMBuilder/attributes.xml", true);
+ }
+
+ @Test
+ public void testNamespaceDocumentExpand() {
+ checkStAX("test/resources/DOMBuilder/namespaces.xml", true);
+ }
+
+ @Test
+ @Ignore
+ public void testDocTypeDocumentExpand() {
+ checkStAX("test/resources/DOMBuilder/doctype.xml", true);
+ }
+
+ @Test
+ public void testComplexDocumentExpand() {
+ checkStAX("test/resources/DOMBuilder/complex.xml", true);
+ }
+
+ @Test
+ public void testXSDDocumentExpand() {
+ checkStAX("test/resources/xsdcomplex/input.xml", true);
+ }
+
+ @Test
+ public void testSimpleDocument() {
+ checkStAX("test/resources/DOMBuilder/simple.xml", false);
+ }
+
+ @Test
+ public void testAttributesDocument() {
+ checkStAX("test/resources/DOMBuilder/attributes.xml", false);
+ }
+
+ @Test
+ public void testNamespaceDocument() {
+ checkStAX("test/resources/DOMBuilder/namespaces.xml", false);
+ }
+
+ @Test
+ public void testDocTypeDocument() {
+ checkStAX("test/resources/DOMBuilder/doctype.xml", false);
+ }
+
+ @Test
+ public void testComplexDocument() {
+ checkStAX("test/resources/DOMBuilder/complex.xml", false);
+ }
+
+ @Test
+ public void testXSDDocument() {
+ checkStAX("test/resources/xsdcomplex/input.xml", false);
+ }
+
+ private void checkStAX(String filename, boolean expand) {
+ try {
+ StAXBuilder stxb = new StAXBuilder();
+ StreamSource source = new StreamSource(new File(filename));
+ XMLInputFactory inputfac = XMLInputFactory.newInstance();
+ inputfac.setProperty(
+ "javax.xml.stream.isReplacingEntityReferences", Boolean.valueOf(expand));
+ inputfac.setProperty("http://java.sun.com/xml/stream/properties/report-cdata-event", Boolean.TRUE);
+ XMLStreamReader reader = inputfac.createXMLStreamReader(source);
+ Document staxbuild = stxb.build(reader);
+ Element staxroot = staxbuild.hasRootElement() ? staxbuild.getRootElement() : null;
+
+ StreamSource eventsource = new StreamSource(new File(filename));
+ XMLEventReader events = inputfac.createXMLEventReader(eventsource);
+ Document eventbuild = stxb.build(events);
+ Element eventroot = staxbuild.hasRootElement() ? eventbuild.getRootElement() : null;
+
+ SAXBuilder sb = new SAXBuilder(false);
+ sb.setExpandEntities(expand);
+ sb.setFeature("http://xml.org/sax/features/namespaces", true);
+ sb.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
+
+ Document saxbuild = sb.build(filename);
+ Element saxroot = saxbuild.hasRootElement() ? saxbuild.getRootElement() : null;
+
+ assertEquals("DOC SAX to StAXReader", toString(saxbuild), toString(staxbuild));
+ assertEquals("DOC SAX to StAXEvent", toString(saxbuild), toString(eventbuild));
+ assertEquals("DOC StAXReader to StAXEvent", toString(staxbuild), toString(eventbuild));
+ assertEquals("ROOT SAX to StAXReader", toString(saxroot), toString(staxroot));
+ assertEquals("ROOT SAX to StAXEvent", toString(saxroot), toString(eventroot));
+ assertEquals("ROOT StAXReader to StAXEvent", toString(staxroot), toString(eventroot));
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("Could not parse file '" + filename + "': " + e.getMessage());
+ }
+ }
+
+ private void normalizeDTD(DocType dt) {
+ if (dt == null) {
+ return;
+ }
+ // do some tricks so that we can compare the results.
+ // these may well break the actual syntax of DTD's but for testing
+ // purposes it is OK.
+ String internalss = dt.getInternalSubset().trim() ;
+ // the spaceing in and around the internal subset is different between
+ // our SAX parse, and the DOM parse.
+ // make all whitespace a single space.
+ internalss = internalss.replaceAll("\\s+", " ");
+ // It seems the DOM parser internally quotes entities with single quote
+ // but our sax parser uses double-quote.
+ // simply replace all " with ' and be done with it.
+ internalss = internalss.replaceAll("\"", "'");
+ dt.setInternalSubset("\n" + internalss + "\n");
+ }
+
+ private String toString(Document doc) {
+ UnitTestUtil.normalizeAttributes(doc.getRootElement());
+ normalizeDTD(doc.getDocType());
+ XMLOutputter out = new XMLOutputter(Format.getPrettyFormat());
+ CharArrayWriter caw = new CharArrayWriter();
+ try {
+ out.output(doc, caw);
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ return caw.toString();
+ }
+
+ private String toString(Element emt) {
+ UnitTestUtil.normalizeAttributes(emt);
+ XMLOutputter out = new XMLOutputter(Format.getPrettyFormat());
+ CharArrayWriter caw = new CharArrayWriter();
+ try {
+ out.output(emt, caw);
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ return caw.toString();
+ }
+
+}

0 comments on commit 59ee0d5

Please sign in to comment.