Skip to content

Commit

Permalink
Encoding quotes in Url Cannonicalizer
Browse files Browse the repository at this point in the history
  • Loading branch information
Satendra Tiwari committed Jul 10, 2014
1 parent 39d31e9 commit 897f85b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -4,7 +4,7 @@
<artifactId>crawler4j-indix</artifactId>
<packaging>jar</packaging>
<name>crawler4j</name>
<version>3.5.2-indix</version>
<version>3.5.3-indix</version>
<description>Open Source Web Crawler for Java</description>
<url>http://code.google.com/p/crawler4j/</url>
<licenses>
Expand Down
10 changes: 9 additions & 1 deletion src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java
Expand Up @@ -176,12 +176,20 @@ private static String canonicalize(final List<Pair> params) {
sb.append(pair.getKey());
if (!pair.getValue().isEmpty()) {
sb.append('=');
sb.append(pair.getValue());
sb.append(encodeQuotes(pair.getValue()));
}
}
return sb.toString();
}

private static String encodeQuotes(String string){
try {
return string.replace("\"", "%22");
} catch (Exception e) {
return string;
}
}

/**
* Percent-encode values according the RFC 3986. The built-in Java
* URLEncoder does not encode according to the RFC, so we make the extra
Expand Down

0 comments on commit 897f85b

Please sign in to comment.