Skip to content

Commit

Permalink
Fixed handling of public/system flag in doctypes
Browse files Browse the repository at this point in the history
Fixes #408
  • Loading branch information
jhy committed Oct 25, 2016
1 parent fa929d4 commit c28e5bf
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ jsoup changelog
* Improved startup time, particularly on Android, by reducing garbage generation and CPU execution time when loading
the HTML entity files. About 1.72x faster in this area.

* Bugfix - a "SYSTEM" flag in doctype tags would be incorrectly removed.
<https://github.com/jhy/jsoup/issues/408>

*** Release 1.10.1 [2016-Oct-23]
* New feature: added the option to preserve case for tags and/or attributes, with ParseSettings. By default, the HTML
parser will continue to normalize tag names and attribute names to lower case, and the XML parser will now preserve
Expand Down
28 changes: 27 additions & 1 deletion src/main/java/org/jsoup/nodes/DocumentType.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
* A {@code <!DOCTYPE>} node.
*/
public class DocumentType extends Node {
public static final String PUBLIC_KEY = "PUBLIC";
public static final String SYSTEM_KEY = "SYSTEM";
private static final String NAME = "name";
private static final String PUB_SYS_KEY = "pubSysKey"; // PUBLIC or SYSTEM
private static final String PUBLIC_ID = "publicId";
private static final String SYSTEM_ID = "systemId";
// todo: quirk mode from publicId and systemId
Expand All @@ -26,6 +29,27 @@ public DocumentType(String name, String publicId, String systemId, String baseUr

attr(NAME, name);
attr(PUBLIC_ID, publicId);
if (has(PUBLIC_ID)) {
attr(PUB_SYS_KEY, PUBLIC_KEY);
}
attr(SYSTEM_ID, systemId);
}

/**
* Create a new doctype element.
* @param name the doctype's name
* @param publicId the doctype's public ID
* @param systemId the doctype's system ID
* @param baseUri the doctype's base URI
*/
public DocumentType(String name, String pubSysKey, String publicId, String systemId, String baseUri) {
super(baseUri);

attr(NAME, name);
if (pubSysKey != null) {
attr(PUB_SYS_KEY, pubSysKey);
}
attr(PUBLIC_ID, publicId);
attr(SYSTEM_ID, systemId);
}

Expand All @@ -44,8 +68,10 @@ void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) thr
}
if (has(NAME))
accum.append(" ").append(attr(NAME));
if (has(PUB_SYS_KEY))
accum.append(" ").append(attr(PUB_SYS_KEY));
if (has(PUBLIC_ID))
accum.append(" PUBLIC \"").append(attr(PUBLIC_ID)).append('"');
accum.append(" \"").append(attr(PUBLIC_ID)).append('"');
if (has(SYSTEM_ID))
accum.append(" \"").append(attr(SYSTEM_ID)).append('"');
accum.append('>');
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
// todo: quirk state check on doctype ids
Token.Doctype d = t.asDoctype();
DocumentType doctype = new DocumentType(
tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri());
tb.settings.normalizeTag(d.getName()), d.getPubSysKey(), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri());
tb.getDocument().appendChild(doctype);
if (d.isForceQuirks())
tb.getDocument().quirksMode(Document.QuirksMode.quirks);
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/org/jsoup/parser/Token.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ static void reset(StringBuilder sb) {

static final class Doctype extends Token {
final StringBuilder name = new StringBuilder();
String pubSysKey = null;
final StringBuilder publicIdentifier = new StringBuilder();
final StringBuilder systemIdentifier = new StringBuilder();
boolean forceQuirks = false;
Expand All @@ -43,6 +44,7 @@ static final class Doctype extends Token {
@Override
Token reset() {
reset(name);
pubSysKey = null;
reset(publicIdentifier);
reset(systemIdentifier);
forceQuirks = false;
Expand All @@ -53,6 +55,10 @@ String getName() {
return name.toString();
}

String getPubSysKey() {
return pubSysKey;
}

String getPublicIdentifier() {
return publicIdentifier.toString();
}
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/org/jsoup/parser/TokeniserState.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jsoup.parser;

import org.jsoup.nodes.DocumentType;

import java.util.Arrays;

/**
Expand Down Expand Up @@ -1189,9 +1191,11 @@ void read(Tokeniser t, CharacterReader r) {
else if (r.matches('>')) {
t.emitDoctypePending();
t.advanceTransition(Data);
} else if (r.matchConsumeIgnoreCase("PUBLIC")) {
} else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) {
t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY;
t.transition(AfterDoctypePublicKeyword);
} else if (r.matchConsumeIgnoreCase("SYSTEM")) {
} else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) {
t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY;
t.transition(AfterDoctypeSystemKeyword);
} else {
t.error(this);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/parser/XmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void insert(Token.Character characterToken) {
}

void insert(Token.Doctype d) {
DocumentType doctypeNode = new DocumentType(settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
DocumentType doctypeNode = new DocumentType(settings.normalizeTag(d.getName()), d.getPubSysKey(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
insertNode(doctypeNode);
}

Expand Down
29 changes: 29 additions & 0 deletions src/test/java/org/jsoup/nodes/DocumentTypeTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jsoup.nodes;

import org.jsoup.Jsoup;
import org.jsoup.parser.Parser;
import org.junit.Test;

import static org.junit.Assert.*;
Expand Down Expand Up @@ -38,4 +40,31 @@ public void constructorValidationOkWithBlankPublicAndSystemIds() {
DocumentType combo = new DocumentType("notHtml", "--public", "--system", "");
assertEquals("<!DOCTYPE notHtml PUBLIC \"--public\" \"--system\">", combo.outerHtml());
}

@Test public void testRoundTrip() {
String base = "<!DOCTYPE html>";
assertEquals("<!doctype html>", htmlOutput(base));
assertEquals(base, xmlOutput(base));

String publicDoc = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
assertEquals(publicDoc, htmlOutput(publicDoc));
assertEquals(publicDoc, xmlOutput(publicDoc));

String systemDoc = "<!DOCTYPE html SYSTEM \"exampledtdfile.dtd\">";
assertEquals(systemDoc, htmlOutput(systemDoc));
assertEquals(systemDoc, xmlOutput(systemDoc));

String legacyDoc = "<!DOCTYPE html SYSTEM \"about:legacy-compat\">";
assertEquals(legacyDoc, htmlOutput(legacyDoc));
assertEquals(legacyDoc, xmlOutput(legacyDoc));
}

private String htmlOutput(String in) {
DocumentType type = (DocumentType) Jsoup.parse(in).childNode(0);
return type.outerHtml();
}

private String xmlOutput(String in) {
return Jsoup.parse(in, "", Parser.xmlParser()).childNode(0).outerHtml();
}
}

0 comments on commit c28e5bf

Please sign in to comment.