Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.0.x] Correctly url encode emoji in path segments #227

Merged
merged 2 commits into from
Dec 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ scala:
- 2.11.12
- 2.12.7
jdk:
- oraclejdk8
- openjdk8
matrix:
include:
- jdk: openjdk11
Expand Down
39 changes: 21 additions & 18 deletions core/src/main/scala/uri.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,33 @@ object UriEncode {
def pchar = unreserved ++ (
':' :: '@' :: '&' :: '=' :: '+' :: '$' :: ',' :: Nil
)
val segmentValid = (';' +: pchar).toSet
val segmentValid: Set[Char] = (';' +: pchar).toSet

private val validMarkers = (0 to segmentValid.max.toInt).map(i => segmentValid(i.toChar)).toArray
private def isValidChar(ch: Char) = (ch < validMarkers.length) && validMarkers(ch.toInt)
// There are likely more optimal ways of doing this calculation, however
// it seems unlikely that long path segments are often on the hot path
// of a request in such a way that they can't be cached. If that proves
// not to be true, then we can revisit.
private def isValidChar(b: Byte) = {
segmentValid.contains(b.toChar)
}

def path(pathSegment: String, encoding: String = "UTF-8") = {
if (pathSegment.forall(isValidChar)) {
val pathBytes = pathSegment.getBytes(encoding)

if (pathBytes.forall(isValidChar)) {
pathSegment
}
else {
} else {
val sb = new StringBuilder(pathSegment.length << 1)

pathSegment foreach { ch =>
if (isValidChar(ch)) {
sb.append(ch)
}
else {
ch.toString.getBytes(encoding) foreach { b =>
val hi = (b >>> 4) & 0xf
val lo = b & 0xf
sb.append('%')
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
}
pathBytes.foreach { b =>
if (isValidChar(b)) {
sb.append(b.toChar)
} else {
val hi = (b >>> 4) & 0xf
val lo = b & 0xf
sb.append('%')
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
}
}

Expand Down
11 changes: 8 additions & 3 deletions core/src/test/scala/uri.scala
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
package dispatch.spec

import org.scalacheck._
import org.scalacheck.Prop.BooleanOperators
import org.scalacheck.Prop._

object UriSpecification extends Properties("Uri") {
/** java.net.URLDecoder should *NOT* be used for testing URI segment decoding
* because it implements completely different functionality: query parameter decoding
*/
property("encode-decode") = Prop.forAll { (path: String) =>
property("Encodes and decodes basic strings") = Prop.forAll { (path: String) =>
!path.contains(":") ==> {
new java.net.URI(dispatch.UriEncode.path(path)).getPath == path
} // else Prop.throws(classOf[java.net.URISyntaxException])
}

/** if there is nothing to escape, encoder must return original reference */
property("noop") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
property("Does nothing if there's nothing eo encode") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
val path = "A" * n
dispatch.UriEncode.path(path) eq path
}

property("Encodes emoji correctly") = forAll(Gen.const("unused")) { (sample: String) =>
val path = "roma🇮🇹"
new java.net.URI(dispatch.UriEncode.path(path)).getPath == (path)
}
}