diff --git a/src/main/java/org/jsoup/parser/CharacterReader.java b/src/main/java/org/jsoup/parser/CharacterReader.java index 16f742ee07..d119803c76 100644 --- a/src/main/java/org/jsoup/parser/CharacterReader.java +++ b/src/main/java/org/jsoup/parser/CharacterReader.java @@ -16,7 +16,7 @@ public final class CharacterReader { static final char EOF = (char) -1; private static final int maxStringCacheLen = 12; static final int maxBufferLen = 1024 * 32; // visible for testing - private static final int readAheadLimit = (int) (maxBufferLen * 0.75); + static final int readAheadLimit = (int) (maxBufferLen * 0.75); // visible for testing private static final int minReadAheadLen = 1024; // the minimum mark length supported. No HTML entities can be larger than this. private final char[] charBuf; diff --git a/src/main/java/org/jsoup/parser/TokeniserState.java b/src/main/java/org/jsoup/parser/TokeniserState.java index 544c0b5892..ab14f1d5da 100644 --- a/src/main/java/org/jsoup/parser/TokeniserState.java +++ b/src/main/java/org/jsoup/parser/TokeniserState.java @@ -161,8 +161,8 @@ void read(Tokeniser t, CharacterReader r) { t.transition(SelfClosingStartTag); break; case '<': // NOTE: out of spec, but clear author intent - t.error(this); r.unconsume(); + t.error(this); // intended fall through to next > case '>': t.emitTagPending(); @@ -572,17 +572,17 @@ void read(Tokeniser t, CharacterReader r) { t.transition(SelfClosingStartTag); break; case '<': // NOTE: out of spec, but clear (spec has this as a part of the attribute name) - t.error(this); r.unconsume(); + t.error(this); // intended fall through as if > case '>': t.emitTagPending(); t.transition(Data); break; case nullChar: + r.unconsume(); t.error(this); t.tagPending.newAttribute(); - r.unconsume(); t.transition(AttributeName); break; case eof: @@ -880,8 +880,8 @@ void read(Tokeniser t, CharacterReader r) { t.transition(Data); break; default: - t.error(this); r.unconsume(); + t.error(this); t.transition(BeforeAttributeName); } @@ -901,8 +901,8 @@ void read(Tokeniser t, CharacterReader r) { t.transition(Data); break; default: - t.error(this); r.unconsume(); + t.error(this); t.transition(BeforeAttributeName); } } diff --git a/src/test/java/org/jsoup/parser/ParserIT.java b/src/test/java/org/jsoup/parser/ParserIT.java new file mode 100644 index 0000000000..ca0589d3c7 --- /dev/null +++ b/src/test/java/org/jsoup/parser/ParserIT.java @@ -0,0 +1,23 @@ +package org.jsoup.parser; + +import org.junit.Test; + +/** + * Longer running Parser tests. + */ + +public class ParserIT { + @Test + public void testIssue1251() { + // https://github.com/jhy/jsoup/issues/1251 + StringBuilder str = new StringBuilder("

Two
", TextUtil.stripNewlines(doc.body().html())); } + + @Test + public void testUnconsumeAtBufferBoundary() { + String triggeringSnippet = "