diff --git a/CHANGES b/CHANGES index 79389eeefd..40ad8856b7 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,9 @@ jsoup changelog * Improved startup time, particularly on Android, by reducing garbage generation and CPU execution time when loading the HTML entity files. About 1.72x faster in this area. + * Bugfix - a "SYSTEM" flag in doctype tags would be incorrectly removed. + + *** Release 1.10.1 [2016-Oct-23] * New feature: added the option to preserve case for tags and/or attributes, with ParseSettings. By default, the HTML parser will continue to normalize tag names and attribute names to lower case, and the XML parser will now preserve diff --git a/src/main/java/org/jsoup/nodes/DocumentType.java b/src/main/java/org/jsoup/nodes/DocumentType.java index 838ec6f720..4e7730b128 100644 --- a/src/main/java/org/jsoup/nodes/DocumentType.java +++ b/src/main/java/org/jsoup/nodes/DocumentType.java @@ -9,7 +9,10 @@ * A {@code } node. */ public class DocumentType extends Node { + public static final String PUBLIC_KEY = "PUBLIC"; + public static final String SYSTEM_KEY = "SYSTEM"; private static final String NAME = "name"; + private static final String PUB_SYS_KEY = "pubSysKey"; // PUBLIC or SYSTEM private static final String PUBLIC_ID = "publicId"; private static final String SYSTEM_ID = "systemId"; // todo: quirk mode from publicId and systemId @@ -26,6 +29,27 @@ public DocumentType(String name, String publicId, String systemId, String baseUr attr(NAME, name); attr(PUBLIC_ID, publicId); + if (has(PUBLIC_ID)) { + attr(PUB_SYS_KEY, PUBLIC_KEY); + } + attr(SYSTEM_ID, systemId); + } + + /** + * Create a new doctype element. + * @param name the doctype's name + * @param publicId the doctype's public ID + * @param systemId the doctype's system ID + * @param baseUri the doctype's base URI + */ + public DocumentType(String name, String pubSysKey, String publicId, String systemId, String baseUri) { + super(baseUri); + + attr(NAME, name); + if (pubSysKey != null) { + attr(PUB_SYS_KEY, pubSysKey); + } + attr(PUBLIC_ID, publicId); attr(SYSTEM_ID, systemId); } @@ -44,8 +68,10 @@ void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) thr } if (has(NAME)) accum.append(" ").append(attr(NAME)); + if (has(PUB_SYS_KEY)) + accum.append(" ").append(attr(PUB_SYS_KEY)); if (has(PUBLIC_ID)) - accum.append(" PUBLIC \"").append(attr(PUBLIC_ID)).append('"'); + accum.append(" \"").append(attr(PUBLIC_ID)).append('"'); if (has(SYSTEM_ID)) accum.append(" \"").append(attr(SYSTEM_ID)).append('"'); accum.append('>'); diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index 9a4081405a..c515462c77 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -20,7 +20,7 @@ boolean process(Token t, HtmlTreeBuilder tb) { // todo: quirk state check on doctype ids Token.Doctype d = t.asDoctype(); DocumentType doctype = new DocumentType( - tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri()); + tb.settings.normalizeTag(d.getName()), d.getPubSysKey(), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri()); tb.getDocument().appendChild(doctype); if (d.isForceQuirks()) tb.getDocument().quirksMode(Document.QuirksMode.quirks); diff --git a/src/main/java/org/jsoup/parser/Token.java b/src/main/java/org/jsoup/parser/Token.java index 3b110c9125..34baf19699 100644 --- a/src/main/java/org/jsoup/parser/Token.java +++ b/src/main/java/org/jsoup/parser/Token.java @@ -32,6 +32,7 @@ static void reset(StringBuilder sb) { static final class Doctype extends Token { final StringBuilder name = new StringBuilder(); + String pubSysKey = null; final StringBuilder publicIdentifier = new StringBuilder(); final StringBuilder systemIdentifier = new StringBuilder(); boolean forceQuirks = false; @@ -43,6 +44,7 @@ static final class Doctype extends Token { @Override Token reset() { reset(name); + pubSysKey = null; reset(publicIdentifier); reset(systemIdentifier); forceQuirks = false; @@ -53,6 +55,10 @@ String getName() { return name.toString(); } + String getPubSysKey() { + return pubSysKey; + } + String getPublicIdentifier() { return publicIdentifier.toString(); } diff --git a/src/main/java/org/jsoup/parser/TokeniserState.java b/src/main/java/org/jsoup/parser/TokeniserState.java index 2e998d5219..6a97238d49 100644 --- a/src/main/java/org/jsoup/parser/TokeniserState.java +++ b/src/main/java/org/jsoup/parser/TokeniserState.java @@ -1,5 +1,7 @@ package org.jsoup.parser; +import org.jsoup.nodes.DocumentType; + import java.util.Arrays; /** @@ -1189,9 +1191,11 @@ void read(Tokeniser t, CharacterReader r) { else if (r.matches('>')) { t.emitDoctypePending(); t.advanceTransition(Data); - } else if (r.matchConsumeIgnoreCase("PUBLIC")) { + } else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) { + t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY; t.transition(AfterDoctypePublicKeyword); - } else if (r.matchConsumeIgnoreCase("SYSTEM")) { + } else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) { + t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY; t.transition(AfterDoctypeSystemKeyword); } else { t.error(this); diff --git a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java index 2ac525fc59..cfcb1a34c6 100644 --- a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java @@ -97,7 +97,7 @@ void insert(Token.Character characterToken) { } void insert(Token.Doctype d) { - DocumentType doctypeNode = new DocumentType(settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri); + DocumentType doctypeNode = new DocumentType(settings.normalizeTag(d.getName()), d.getPubSysKey(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri); insertNode(doctypeNode); } diff --git a/src/test/java/org/jsoup/nodes/DocumentTypeTest.java b/src/test/java/org/jsoup/nodes/DocumentTypeTest.java index 12e6268ece..38110ff787 100644 --- a/src/test/java/org/jsoup/nodes/DocumentTypeTest.java +++ b/src/test/java/org/jsoup/nodes/DocumentTypeTest.java @@ -1,5 +1,7 @@ package org.jsoup.nodes; +import org.jsoup.Jsoup; +import org.jsoup.parser.Parser; import org.junit.Test; import static org.junit.Assert.*; @@ -38,4 +40,31 @@ public void constructorValidationOkWithBlankPublicAndSystemIds() { DocumentType combo = new DocumentType("notHtml", "--public", "--system", ""); assertEquals("", combo.outerHtml()); } + + @Test public void testRoundTrip() { + String base = ""; + assertEquals("", htmlOutput(base)); + assertEquals(base, xmlOutput(base)); + + String publicDoc = ""; + assertEquals(publicDoc, htmlOutput(publicDoc)); + assertEquals(publicDoc, xmlOutput(publicDoc)); + + String systemDoc = ""; + assertEquals(systemDoc, htmlOutput(systemDoc)); + assertEquals(systemDoc, xmlOutput(systemDoc)); + + String legacyDoc = ""; + assertEquals(legacyDoc, htmlOutput(legacyDoc)); + assertEquals(legacyDoc, xmlOutput(legacyDoc)); + } + + private String htmlOutput(String in) { + DocumentType type = (DocumentType) Jsoup.parse(in).childNode(0); + return type.outerHtml(); + } + + private String xmlOutput(String in) { + return Jsoup.parse(in, "", Parser.xmlParser()).childNode(0).outerHtml(); + } }