Skip to content

Commit 3676b13

Browse files
committed
Merge branch 'master' into pr/988
2 parents 1791ef1 + 45c5499 commit 3676b13

File tree

11 files changed

+104
-12
lines changed

11 files changed

+104
-12
lines changed

CHANGES

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,21 @@ jsoup changelog
66
<https://github.com/jhy/jsoup/issues/406>
77
<https://github.com/jhy/jsoup/issues/965>
88

9+
* Improvement: added support for Deflate encoding.
10+
<https://github.com/jhy/jsoup/pull/982>
11+
12+
* Improvement: when parsing <pre> tags, skip the first newline if present.
13+
<https://github.com/jhy/jsoup/issues/825>
14+
15+
* Bugfix: "Mark has been invalidated" exception was thrown when parsing some URLs on Android <= 6.
16+
<https://github.com/jhy/jsoup/issues/990>
17+
18+
* Bugfix: The Element.text() for <div>One</div>Two was "OneTwo", not "One Two".
19+
<https://github.com/jhy/jsoup/issues/812>
20+
21+
* Bugfix: boolean attributes with empty string values were not collapsing in HTML output.
22+
<https://github.com/jhy/jsoup/issues/985>
23+
924
*** Release 1.11.2 [2017-Nov-19]
1025
* Improvement: added a new pseudo selector :matchText, which allows text nodes to match as if they were elements.
1126
This enables finding text that is only marked by a "br" tag, for example.

src/main/java/org/jsoup/helper/DataUtil.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ static Document parseInputStream(InputStream input, String charsetName, String b
9999
boolean fullyRead = false;
100100

101101
// read the start of the stream and look for a BOM or meta charset
102-
input.mark(firstReadBufferSize);
103-
ByteBuffer firstBytes = readToByteBuffer(input, firstReadBufferSize - 1); // -1 because we read one more to see if completed
102+
input.mark(bufferSize);
103+
ByteBuffer firstBytes = readToByteBuffer(input, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
104104
fullyRead = input.read() == -1;
105105
input.reset();
106106

src/main/java/org/jsoup/helper/HttpConnection.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
import java.util.Map;
4747
import java.util.regex.Pattern;
4848
import java.util.zip.GZIPInputStream;
49+
import java.util.zip.Inflater;
50+
import java.util.zip.InflaterInputStream;
4951

5052
import static org.jsoup.Connection.Method.HEAD;
5153
import static org.jsoup.internal.Normalizer.lowerCase;
@@ -781,8 +783,11 @@ else if (methodHasBody)
781783
if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
782784
res.bodyStream = null;
783785
res.bodyStream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
784-
if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
786+
if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) {
785787
res.bodyStream = new GZIPInputStream(res.bodyStream);
788+
} else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) {
789+
res.bodyStream = new InflaterInputStream(res.bodyStream, new Inflater(true));
790+
}
786791
res.bodyStream = ConstrainableInputStream
787792
.wrap(res.bodyStream, DataUtil.bufferSize, req.maxBodySize())
788793
.timeout(startTime, req.timeout())

src/main/java/org/jsoup/nodes/Attribute.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,10 @@ protected final boolean shouldCollapseAttribute(Document.OutputSettings out) {
159159
return shouldCollapseAttribute(key, val, out);
160160
}
161161

162-
protected static boolean shouldCollapseAttribute(String key, String val, Document.OutputSettings out) {
163-
// todo: optimize
164-
return (val == null || "".equals(val) || val.equalsIgnoreCase(key))
165-
&& out.syntax() == Document.OutputSettings.Syntax.html
166-
&& isBooleanAttribute(key);
162+
protected static boolean shouldCollapseAttribute(final String key, final String val, final Document.OutputSettings out) {
163+
return (
164+
out.syntax() == Document.OutputSettings.Syntax.html &&
165+
(val == null || ("".equals(val) || val.equalsIgnoreCase(key)) && Attribute.isBooleanAttribute(key)));
167166
}
168167

169168
/**

src/main/java/org/jsoup/nodes/Attributes.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,9 +316,7 @@ final void html(final Appendable accum, final Document.OutputSettings out) throw
316316
accum.append(' ').append(key);
317317

318318
// collapse checked=null, checked="", checked=checked; write out others
319-
if (!(out.syntax() == Document.OutputSettings.Syntax.html
320-
&& (val == null || val.equals(key) && Attribute.isBooleanAttribute(key)))) {
321-
319+
if (!Attribute.shouldCollapseAttribute(key, val, out)) {
322320
accum.append("=\"");
323321
Entities.escape(accum, val == null ? EmptyString : val, out, true, false, false);
324322
accum.append('"');

src/main/java/org/jsoup/nodes/Element.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,13 @@ public void head(Node node, int depth) {
10381038
}
10391039

10401040
public void tail(Node node, int depth) {
1041+
// make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
1042+
if (node instanceof Element) {
1043+
Element element = (Element) node;
1044+
if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum))
1045+
accum.append(' ');
1046+
}
1047+
10411048
}
10421049
}, this);
10431050
return accum.toString().trim();

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
379379
tb.processEndTag("p");
380380
}
381381
tb.insert(startTag);
382-
// todo: ignore LF if next token
382+
tb.reader.matchConsume("\n"); // ignore LF if next token
383383
tb.framesetOk(false);
384384
} else if (name.equals("form")) {
385385
if (tb.getFormElement() != null) {

src/test/java/org/jsoup/integration/ConnectTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.jsoup.Connection;
44
import org.jsoup.Jsoup;
5+
import org.jsoup.integration.servlets.Deflateservlet;
56
import org.jsoup.integration.servlets.EchoServlet;
67
import org.jsoup.integration.servlets.HelloServlet;
78
import org.jsoup.integration.servlets.SlowRider;
@@ -366,4 +367,13 @@ public void multiCookieSet() throws IOException {
366367
Document doc = Jsoup.connect(echoUrl).cookies(cookies).get();
367368
assertEquals("token=asdfg123; uid=jhy", ihVal("Cookie", doc));
368369
}
370+
371+
@Test
372+
public void supportsDeflate() throws IOException {
373+
Connection.Response res = Jsoup.connect(Deflateservlet.Url).execute();
374+
assertEquals("deflate", res.header("Content-Encoding"));
375+
376+
Document doc = res.parse();
377+
assertEquals("Hello, World!", doc.selectFirst("p").text());
378+
}
369379
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package org.jsoup.integration.servlets;
2+
3+
import org.jsoup.integration.TestServer;
4+
5+
import javax.servlet.ServletException;
6+
import javax.servlet.http.HttpServletRequest;
7+
import javax.servlet.http.HttpServletResponse;
8+
import java.io.IOException;
9+
import java.nio.charset.StandardCharsets;
10+
import java.util.zip.Deflater;
11+
import java.util.zip.DeflaterOutputStream;
12+
13+
public class Deflateservlet extends BaseServlet {
14+
public static final String Url = TestServer.map(Deflateservlet.class);
15+
16+
@Override
17+
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
18+
res.setContentType(TextHtml);
19+
res.setStatus(HttpServletResponse.SC_OK);
20+
res.setHeader("Content-Encoding", "deflate");
21+
22+
String doc = "<p>Hello, World!<p>That should be enough, right?<p>Hello, World!<p>That should be enough, right?";
23+
24+
DeflaterOutputStream stream = new DeflaterOutputStream(
25+
res.getOutputStream(),
26+
new Deflater(Deflater.BEST_COMPRESSION, true)); // true = nowrap zlib headers
27+
28+
stream.write(doc.getBytes(StandardCharsets.UTF_8));
29+
stream.close();
30+
}
31+
32+
// allow the servlet to run as a main program, for local test
33+
public static void main(String[] args) {
34+
TestServer.start();
35+
System.out.println(Url);
36+
}
37+
}

src/test/java/org/jsoup/nodes/ElementTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,4 +1310,18 @@ public void testRemovingEmptyClassAttributeWhenLastClassRemoved() {
13101310
assertFalse(doc.body().html().contains("class=\"\""));
13111311
}
13121312

1313+
@Test
1314+
public void booleanAttributeOutput() {
1315+
Document doc = Jsoup.parse("<img src=foo noshade='' nohref async=async autofocus=false>");
1316+
Element img = doc.selectFirst("img");
1317+
1318+
assertEquals("<img src=\"foo\" noshade nohref async autofocus=\"false\">", img.outerHtml());
1319+
}
1320+
1321+
@Test
1322+
public void textHasSpaceAfterBlockTags() {
1323+
Document doc = Jsoup.parse("<div>One</div>Two");
1324+
assertEquals("One Two", doc.text());
1325+
}
1326+
13131327
}

0 commit comments

Comments
 (0)