Skip to content

Commit

Permalink
Decode components from URL into URI
Browse files Browse the repository at this point in the history
This allows both escaped and un-escaped inputs to be correctly decoded then encoded.

Fixes #1902
  • Loading branch information
jhy committed Feb 24, 2023
1 parent da5e977 commit a96ebc9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
6 changes: 5 additions & 1 deletion CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ jsoup changelog
Release 1.16.1 [PENDING]
* Improvement: Calling Node.remove() on a node with no parent is now a no-op, vs a validation error.
<https://github.com/jhy/jsoup/issues/1898>

* Bugfix: Corrected support for ruby elements (<ruby>, <rp>, <rt>, and <rtc>) to current spec.
<https://github.com/jhy/jsoup/issues/1294>

* Bugfix: When using Node.before(node) or Node.after(node), if the incoming node was a sibling of the context node,
the incoming node may be inserted into the wrong relative location.
<https://github.com/jhy/jsoup/issues/1898>

* Bugfix: In Jsoup.connect(url), if the input URL had components that were already % escaped, they would be escaped
again, causing errors when fetched.
<https://github.com/jhy/jsoup/issues/1902>

Release 1.15.4 [18-Feb-2023]
* Improvement: added the ability to escape CSS selectors (tags, IDs, classes) to match elements that don't follow
regular CSS syntax. For example, to match by classname <p class="one.two">, use document.select("p.one\\.two");
Expand Down
13 changes: 11 additions & 2 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.CookieManager;
import java.net.CookieStore;
import java.net.HttpURLConnection;
Expand All @@ -30,6 +31,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.Buffer;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -127,14 +129,21 @@ static URL encodeUrl(URL u) {
u = punyUrl(u);
try {
// run the URL through URI, so components are encoded
URI uri = new URI(u.getProtocol(), u.getUserInfo(), u.getHost(), u.getPort(), u.getPath(), u.getQuery(), u.getRef());
URI uri = new URI(
u.getProtocol(), decodePart(u.getUserInfo()), u.getHost(), u.getPort(),
decodePart(u.getPath()), decodePart(u.getQuery()), decodePart(u.getRef()));
return uri.toURL();
} catch (URISyntaxException | MalformedURLException e) {
} catch (URISyntaxException | MalformedURLException | UnsupportedEncodingException e) {
// give up and return the original input
return u;
}
}

@Nullable private static String decodePart(@Nullable String encoded) throws UnsupportedEncodingException {
if (encoded == null) return null;
return URLDecoder.decode(encoded, UTF_8.name());
}

/**
Convert an International URL to a Punycode URL.
@param url input URL that may include an international hostname
Expand Down
14 changes: 14 additions & 0 deletions src/test/java/org/jsoup/helper/HttpConnectionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,20 @@ public void caseInsensitiveHeaders(Locale locale) {
assertEquals("https://test.com/foo%20bar/%5BOne%5D?q=white%20space#frag", url2.toExternalForm());
}

@Test void encodedUrlDoesntDoubleEncode() throws MalformedURLException {
URL url1 = new URL("https://test.com/foo bar/[One]?q=white space#frag ment");
URL url2 = HttpConnection.encodeUrl(url1);
URL url3 = HttpConnection.encodeUrl(url2);
assertEquals("https://test.com/foo%20bar/%5BOne%5D?q=white%20space#frag%20ment", url2.toExternalForm());
assertEquals("https://test.com/foo%20bar/%5BOne%5D?q=white%20space#frag%20ment", url3.toExternalForm());
}

@Test void connectToEncodedUrl() {
Connection connect = Jsoup.connect("https://example.com/a%20b%20c?query+string");
URL url = connect.request().url();
assertEquals("https://example.com/a%20b%20c?query%20string", url.toExternalForm());
}

@Test public void noUrlThrowsValidationError() throws IOException {
HttpConnection con = new HttpConnection();
boolean threw = false;
Expand Down

0 comments on commit a96ebc9

Please sign in to comment.