Skip to content

Commit

Permalink
Replaced whitespaces by "%20" in urls on redirects and .connect(String
Browse files Browse the repository at this point in the history
url)
  • Loading branch information
Christian Schneider (X200) authored and jhy committed Nov 11, 2013
1 parent c1bdb9d commit c765b81
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ public static Connection connect(URL url) {
return con;
}

private static String encodeUrl(String url) {
if(url == null)
return null;
return url.replaceAll(" ", "%20");
}

private Connection.Request req;
private Connection.Response res;

Expand All @@ -50,7 +56,7 @@ public Connection url(URL url) {
public Connection url(String url) {
Validate.notEmpty(url, "Must supply a valid URL");
try {
req.url(new URL(url));
req.url(new URL(encodeUrl(url)));
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Malformed URL: " + url, e);
}
Expand Down Expand Up @@ -447,7 +453,7 @@ else if (!req.ignoreHttpErrors())
if (needsRedirect && req.followRedirects()) {
req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
req.data().clear();
req.url(new URL(req.url(), res.header("Location")));
req.url(new URL(req.url(), encodeUrl(res.header("Location"))));
for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add response cookies to request (for e.g. login posts)
req.cookie(cookie.getKey(), cookie.getValue());
}
Expand Down
14 changes: 14 additions & 0 deletions src/test/java/org/jsoup/integration/UrlConnectTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ public void fetchURl() throws IOException {
assertTrue(doc.title().contains("Google"));
}

@Test
public void fetchURIWithWihtespace() throws IOException {
Connection con = Jsoup.connect("http://try.jsoup.org/#with whitespaces");
Document doc = con.get();
assertTrue(doc.title().contains("jsoup"));
}

@Test
public void fetchBaidu() throws IOException {
Connection.Response res = Jsoup.connect("http://www.baidu.com/").timeout(10*1000).execute();
Expand Down Expand Up @@ -140,6 +147,13 @@ public void followsRelativeRedirect() throws IOException {
assertTrue(doc.title().contains("HTML Tidy Online"));
}

@Test
public void followsRedirectsWithWithespaces() throws IOException {
Connection con = Jsoup.connect("http://tinyurl.com/kgofxl8"); // to http://www.google.com/?q=white spaces
Document doc = con.get();
assertTrue(doc.title().contains("Google"));
}

@Test
public void throwsExceptionOnError() {
String url = "http://direct.infohound.net/tools/404";
Expand Down

0 comments on commit c765b81

Please sign in to comment.