Skip to content

Commit

Permalink
Merge pull request #2830 from IndiscriminateCoding/url-coding-utils
Browse files Browse the repository at this point in the history
Move Url decoding/encoding to Uri companion object
  • Loading branch information
rossabaker committed Sep 19, 2019
2 parents d9bf89e + d778e00 commit e75815a
Show file tree
Hide file tree
Showing 10 changed files with 230 additions and 246 deletions.
4 changes: 4 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ lazy val core = libraryProject("core")
vault,
scalaReflect(scalaOrganization.value, scalaVersion.value) % "provided",
),
mimaBinaryIssueFilters ++= Seq(
"org.http4s.util.UrlCodingUtils",
"org.http4s.util.UrlCodingUtils$"
).map(ProblemFilters.exclude[MissingClassProblem]),
unmanagedSourceDirectories in Compile ++= {
(unmanagedSourceDirectories in Compile).value.map { dir =>
val sv = scalaVersion.value
Expand Down
3 changes: 1 addition & 2 deletions client/src/main/scala/org/http4s/client/oauth1/oauth1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets
import javax.crypto
import org.http4s.headers.Authorization
import org.http4s.syntax.string._
import org.http4s.util.UrlCodingUtils
import scala.collection.immutable
import scala.collection.mutable.ListBuffer

Expand Down Expand Up @@ -105,7 +104,7 @@ package object oauth1 {
}

private[oauth1] def encode(str: String): String =
UrlCodingUtils.urlEncode(str, spaceIsPlus = false, toSkip = UrlCodingUtils.Unreserved)
Uri.encode(str, spaceIsPlus = false, toSkip = Uri.Unreserved)

private[oauth1] def getUserParams[F[_]](req: Request[F])(
implicit F: MonadError[F, Throwable],
Expand Down
7 changes: 3 additions & 4 deletions core/src/main/scala/org/http4s/Query.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import org.http4s.Query._
import org.http4s.internal.CollectionCompat
import org.http4s.internal.parboiled2.CharPredicate
import org.http4s.parser.QueryParser
import org.http4s.util.{Renderable, UrlCodingUtils, Writer}
import org.http4s.util.{Renderable, Writer}
import scala.collection.immutable

/** Collection representation of a query string
Expand Down Expand Up @@ -70,7 +70,7 @@ final class Query private (val pairs: Vector[KeyValue]) extends QueryOps with Re
override def render(writer: Writer): writer.type = {
var first = true
def encode(s: String) =
UrlCodingUtils.urlEncode(s, spaceIsPlus = false, toSkip = NoEncode)
Uri.encode(s, spaceIsPlus = false, toSkip = NoEncode)
pairs.foreach {
case (n, None) =>
if (!first) writer.append('&')
Expand Down Expand Up @@ -130,8 +130,7 @@ object Query {
* avoid percent-encoding those characters."
* -- http://tools.ietf.org/html/rfc3986#section-3.4
*/
private val NoEncode: CharPredicate =
UrlCodingUtils.Unreserved ++ "?/"
private val NoEncode: CharPredicate = Uri.Unreserved ++ "?/"

def apply(xs: (String, Option[String])*): Query =
new Query(xs.toVector)
Expand Down
130 changes: 128 additions & 2 deletions core/src/main/scala/org/http4s/Uri.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package org.http4s

import cats._
import cats.implicits.{catsSyntaxEither => _, _}
import java.nio.{ByteBuffer, CharBuffer}
import java.nio.charset.{Charset => JCharset}
import java.nio.charset.StandardCharsets
import org.http4s.internal.parboiled2.CharPredicate
import org.http4s.Uri._
import org.http4s.internal.parboiled2.CharPredicate.{Alpha, Digit}
import org.http4s.internal.parboiled2.{Parser => PbParser}
Expand Down Expand Up @@ -39,7 +42,7 @@ final case class Uri(
def withoutFragment: Uri = copy(fragment = Option.empty[Fragment])

def /(newSegment: Path): Uri = {
val encoded = UrlCodingUtils.pathEncode(newSegment)
val encoded = pathEncode(newSegment)
val newPath =
if (path.isEmpty || path.last != '/') s"$path/$encoded"
else s"$path$encoded"
Expand Down Expand Up @@ -105,7 +108,7 @@ final case class Uri(
writer << path
if (query.nonEmpty) writer << '?' << query
fragment.foreach { f =>
writer << '#' << UrlCodingUtils.urlEncode(f, spaceIsPlus = false)
writer << '#' << encode(f, spaceIsPlus = false)
}
writer
}
Expand Down Expand Up @@ -374,6 +377,129 @@ object Uri {
case n => b.setLength(n)
}

/**
* Taken from https://github.com/scalatra/rl/blob/v0.4.10/core/src/main/scala/rl/UrlCodingUtils.scala
* Copyright (c) 2011 Mojolly Ltd.
*/
private[http4s] val Unreserved =
CharPredicate.AlphaNum ++ "-_.~"

private val toSkip =
Unreserved ++ "!$&'()*+,;=:/?@"

// scalastyle:off magic.number
private val HexUpperCaseChars = (0 until 16).map { i =>
Character.toUpperCase(Character.forDigit(i, 16))
}
// scalastyle:on magic.number

/**
* Percent-encodes a string. Depending on the parameters, this method is
* appropriate for URI or URL form encoding. Any resulting percent-encodings
* are normalized to uppercase.
*
* @param toEncode the string to encode
* @param charset the charset to use for characters that are percent encoded
* @param spaceIsPlus if space is not skipped, determines whether it will be
* rendreed as a `"+"` or a percent-encoding according to `charset`.
* @param toSkip a predicate of characters exempt from encoding. In typical
* use, this is composed of all Unreserved URI characters and sometimes a
* subset of Reserved URI characters.
*/
def encode(
toEncode: String,
charset: JCharset = StandardCharsets.UTF_8,
spaceIsPlus: Boolean = false,
toSkip: Char => Boolean = toSkip): String = {
val in = charset.encode(toEncode)
val out = CharBuffer.allocate((in.remaining() * 3).toInt)
while (in.hasRemaining) {
val c = in.get().toChar
if (toSkip(c)) {
out.put(c)
} else if (c == ' ' && spaceIsPlus) {
out.put('+')
} else {
out.put('%')
out.put(HexUpperCaseChars((c >> 4) & 0xF))
out.put(HexUpperCaseChars(c & 0xF))
}
}
out.flip()
out.toString
}

private val SkipEncodeInPath =
Unreserved ++ ":@!$&'()*+,;="

def pathEncode(s: String, charset: JCharset = StandardCharsets.UTF_8): String =
encode(s, charset, false, SkipEncodeInPath)

/**
* Percent-decodes a string.
*
* @param toDecode the string to decode
* @param charset the charset of percent-encoded characters
* @param plusIsSpace true if `'+'` is to be interpreted as a `' '`
* @param toSkip a predicate of characters whose percent-encoded form
* is left percent-encoded. Almost certainly should be left empty.
*/
def decode(
toDecode: String,
charset: JCharset = StandardCharsets.UTF_8,
plusIsSpace: Boolean = false,
toSkip: Char => Boolean = Function.const(false)): String = {
val in = CharBuffer.wrap(toDecode)
// reserve enough space for 3-byte UTF-8 characters. 4-byte characters are represented
// as surrogate pairs of characters, and will get a luxurious 6 bytes of space.
val out = ByteBuffer.allocate(in.remaining() * 3)
while (in.hasRemaining) {
val mark = in.position()
val c = in.get()
if (c == '%') {
if (in.remaining() >= 2) {
val xc = in.get()
val yc = in.get()
// scalastyle:off magic.number
val x = Character.digit(xc, 0x10)
val y = Character.digit(yc, 0x10)
// scalastyle:on magic.number
if (x != -1 && y != -1) {
val oo = (x << 4) + y
if (!toSkip(oo.toChar)) {
out.put(oo.toByte)
} else {
out.put('%'.toByte)
out.put(xc.toByte)
out.put(yc.toByte)
}
} else {
out.put('%'.toByte)
in.position(mark + 1)
}
} else {
// This is an invalid encoding. Fail gracefully by treating the '%' as
// a literal.
out.put(c.toByte)
while (in.hasRemaining) out.put(in.get().toByte)
}
} else if (c == '+' && plusIsSpace) {
out.put(' '.toByte)
} else {
// normally `out.put(c.toByte)` would be enough since the url is %-encoded,
// however there are cases where a string can be partially decoded
// so we have to make sure the non us-ascii chars get preserved properly.
if (this.toSkip(c)) {
out.put(c.toByte)
} else {
out.put(charset.encode(String.valueOf(c)))
}
}
}
out.flip()
charset.decode(out).toString
}

/**
* Literal syntax for URIs. Invalid or non-literal arguments are rejected
* at compile time.
Expand Down
8 changes: 2 additions & 6 deletions core/src/main/scala/org/http4s/UrlForm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import cats.implicits.{catsSyntaxEither => _, _}
import org.http4s.headers._
import org.http4s.internal.CollectionCompat
import org.http4s.parser._
import org.http4s.util._
import org.http4s.Uri._
import scala.io.Codec

class UrlForm private (val values: Map[String, Chain[String]]) extends AnyVal {
Expand Down Expand Up @@ -128,11 +128,7 @@ object UrlForm {
/** Encode the [[UrlForm]] into a `String` using the provided `Charset` */
def encodeString(charset: Charset)(urlForm: UrlForm): String = {
def encode(s: String): String =
UrlCodingUtils.urlEncode(
s,
charset.nioCharset,
spaceIsPlus = true,
toSkip = UrlCodingUtils.Unreserved)
Uri.encode(s, charset.nioCharset, spaceIsPlus = true, toSkip = Uri.Unreserved)

val sb = new StringBuilder(urlForm.values.size * 20)
urlForm.values.foreach {
Expand Down
3 changes: 1 addition & 2 deletions core/src/main/scala/org/http4s/parser/QueryParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ package parser
import cats.implicits._
import java.io.UnsupportedEncodingException
import java.nio.CharBuffer
import org.http4s.util.UrlCodingUtils
import scala.annotation.switch
import scala.collection.immutable.BitSet
import scala.collection.mutable.Builder
Expand Down Expand Up @@ -97,7 +96,7 @@ private[http4s] class QueryParser(
}

private def decodeParam(str: String): String =
try UrlCodingUtils.urlDecode(str, codec.charSet, plusIsSpace = true)
try Uri.decode(str, codec.charSet, plusIsSpace = true)
catch {
case _: IllegalArgumentException => ""
case _: UnsupportedEncodingException => ""
Expand Down
132 changes: 0 additions & 132 deletions core/src/main/scala/org/http4s/util/UrlCoding.scala

This file was deleted.

Loading

0 comments on commit e75815a

Please sign in to comment.