Browse files

Fixed doctype tokeniser to allow whitespace between name and public i…

…dentifier.
  • Loading branch information...
1 parent c98349a commit 70b2cf9a73e38bd3831a8e196c84556275c3f588 @jhy committed Aug 28, 2011
View
2 CHANGES
@@ -18,6 +18,8 @@ jsoup changelog
* Tweaked escaped entity detection in attributes to not treat &entity_... as an entity form.
<https://github.com/jhy/jsoup/issues/129>
+ * Fixed doctype tokeniser to allow whitespace between name and public identifier.
+
*** Release 1.6.1 [2011-Jul-02]
* Fixed Java 1.5 compatibility.
<https://github.com/jhy/jsoup/issues/103>
View
4 src/main/java/org/jsoup/parser/TokeniserState.java
@@ -1364,7 +1364,9 @@ void read(Tokeniser t, CharacterReader r) {
t.transition(Data);
return;
}
- if (r.matches('>')) {
+ if (r.matchesAny('\t', '\n', '\f', ' '))
+ r.advance(); // ignore whitespace
+ else if (r.matches('>')) {
t.emitDoctypePending();
t.advanceTransition(Data);
} else if (r.matchConsumeIgnoreCase("PUBLIC")) {
View
8 src/test/java/org/jsoup/parser/ParserTest.java
@@ -623,4 +623,12 @@
Document doc = Jsoup.parse("<a \n href=\"one\" \r\n id=\"two\" \f >");
assertEquals("<a href=\"one\" id=\"two\"></a>", doc.body().html());
}
+
+ @Test public void handlesWhitespaceInoDocType() {
+ String html = "<!DOCTYPE html\n" +
+ " PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" +
+ " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
+ Document doc = Jsoup.parse(html);
+ assertEquals("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", doc.childNode(0).outerHtml());
+ }
}

0 comments on commit 70b2cf9

Please sign in to comment.