diff --git a/CHANGES b/CHANGES index b2eb976d6e..4673c1d025 100644 --- a/CHANGES +++ b/CHANGES @@ -11,6 +11,10 @@ jsoup changelog * Improvement: ensure HTTP keepalives work when fetching content via body() and bodyAsBytes(). + * Improvement: set the default max body size in Jsoup.Connection to 2MB (up from 1MB) so fewer people get trimmed + content if they have not set it, but still in sensible bounds. Also updated the default user-agent to improve + default compatibility. + * Bugfix: on pages fetch by Jsoup.Connection, a "Mark Invalid" exception might be incorrectly thrown, or the page may miss some data. This occurred on larger pages when the file transfer was chunked, an an invalid HTML entity happened to cross a chunk boundary. diff --git a/src/main/java/org/jsoup/Connection.java b/src/main/java/org/jsoup/Connection.java index 9f4fe63fdd..12d4a615da 100644 --- a/src/main/java/org/jsoup/Connection.java +++ b/src/main/java/org/jsoup/Connection.java @@ -98,8 +98,10 @@ public final boolean hasBody() { /** * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed, - * and the input truncated. The default maximum is 1MB. A max size of zero is treated as an infinite amount (bounded - * only by your patience and the memory available on your machine). + * and the input truncated (i.e. the body content will be trimmed). The default maximum is 2MB. A max size of + * 0 is treated as an infinite amount (bounded only by your patience and the memory available on your + * machine). + * * @param bytes number of bytes to read from the input before truncating * @return this Connection, for chaining */ diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java index aad10ab770..ff51252eea 100644 --- a/src/main/java/org/jsoup/helper/HttpConnection.java +++ b/src/main/java/org/jsoup/helper/HttpConnection.java @@ -57,7 +57,7 @@ public class HttpConnection implements Connection { * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. */ public static final String DEFAULT_UA = - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"; + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; private static final String USER_AGENT = "User-Agent"; public static final String CONTENT_TYPE = "Content-Type"; public static final String MULTIPART_FORM_DATA = "multipart/form-data"; @@ -551,7 +551,7 @@ public static class Request extends HttpConnection.Base impl Request() { timeoutMilliseconds = 30000; // 30 seconds - maxBodySizeBytes = 1024 * 1024; // 1MB + maxBodySizeBytes = 1024 * 1024 * 2; // 2MB followRedirects = true; data = new ArrayList<>(); method = Method.GET;