Skip to content

Commit

Permalink
Http.Pct: nail down encode's interface.
Browse files Browse the repository at this point in the history
  • Loading branch information
dbuenzli committed Nov 12, 2021
1 parent 88f71ec commit f11a625
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 19 deletions.
6 changes: 4 additions & 2 deletions B0.ml
Expand Up @@ -63,12 +63,14 @@ let webs_connector_lib =

let webs_cgi_lib =
let srcs = mod_srcs "webs_cgi" in
let requires = [unix; webs; webs_kit; webs_unix] in
let requires = [unix; webs; webs_kit; webs_connector; webs_unix] in
B0_ocaml.lib webs_cgi ~doc:"Webs CGI connector" ~srcs ~requires

let webs_httpc_lib =
let srcs = mod_srcs "webs_httpc" in
let requires = [threads; unix; webs; webs_kit; webs_unix; webs_tpool] in
let requires = [threads; unix; webs; webs_kit; webs_connector; webs_unix;
webs_tpool]
in
B0_ocaml.lib webs_httpc ~doc:"Webs HTTP/1.1 connector" ~srcs ~requires

let webs_cli_lib =
Expand Down
36 changes: 26 additions & 10 deletions src/webs.ml
Expand Up @@ -143,14 +143,23 @@ module Http = struct
end

module Pct = struct
let is_non_pct_pchar = function
(* see https://tools.ietf.org/html/rfc3986#section-3.3 *)

(* See https://tools.ietf.org/html/rfc3986 *)

let char_is_uri_component_verbatim = function
(* unreserved *)
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' | '.' | '_' | '~'
(* sub-delims *)
| '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' -> true
| _ -> false

let char_is_uri_verbatim = function
(* unreserved *)
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' | '.' | '_' | '~'
(* sub-delims *)
| '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
(* *)
| ':' | '@' -> true
(* gen-delims *)
| ':' | '/' | '?' | '#' | '[' | ']' | '@' -> true
| _ -> false

let is_hexdig = function
Expand All @@ -166,9 +175,9 @@ module Http = struct
if i < 10 then Char.unsafe_chr (i + 0x30) else
Char.unsafe_chr (i + 0x37)

let encode_to_buffer b s =
let encode_to_buffer is_verbatim b s =
for k = 0 to String.length s - 1 do match s.[k] with
| c when is_non_pct_pchar c -> Buffer.add_char b c
| c when is_verbatim c -> Buffer.add_char b c
| c ->
let hi = (Char.code c lsr 4) land 0xF in
let lo = (Char.code c) land 0xF in
Expand All @@ -193,12 +202,16 @@ module Http = struct
| c -> Buffer.add_char b c; incr i
done

let encode s =
let encode what s =
(* XXX one day we should benchmark whether the scan first to determine
length and then use Bytes directly is faster – see
Query.pct_encode_space_as_plus) – one day. *)
let is_verbatim = match what with
| `Uri_component -> char_is_uri_component_verbatim
| `Uri -> char_is_uri_verbatim
in
let b = Buffer.create (String.length s * 2) in
encode_to_buffer b s;
encode_to_buffer is_verbatim b s;
Buffer.contents b

let decode s =
Expand Down Expand Up @@ -561,14 +574,17 @@ module Http = struct
let seg = decode_segment b ~first ~last:(i - 1) s in
let i = i + 1 in
loop (seg :: acc) b s ~first:i i
| c when c = '%' || Pct.is_non_pct_pchar c ->
| c when c = '%' || Pct.char_is_uri_component_verbatim c ->
loop acc b s ~first (i + 1)
| c -> Error (err_path_char c)
in
loop [] (Buffer.create 255) s 1 1

let buffer_encode_path b segs =
let add_seg seg = Buffer.add_char b '/'; Pct.encode_to_buffer b seg in
let add_seg seg =
Buffer.add_char b '/';
Pct.encode_to_buffer Pct.char_is_uri_component_verbatim b seg
in
List.iter add_seg segs

let encode segs =
Expand Down
25 changes: 18 additions & 7 deletions src/webs.mli
Expand Up @@ -91,13 +91,24 @@ module Http : sig
[application/x-www-form-urlencoded] which is slightly different.
The {!Query} module handles that. *)
module Pct : sig
val encode : string -> string
(** [encode s] is the percent-encoding of [s].
{b TODO.} Make that more subtle for now
we percent-encode what is not percent-encoded
{{:https://tools.ietf.org/html/rfc3986#section-3.3}[pchar]}
in RFC 3986. *)
val encode : [`Uri_component | `Uri] -> string -> string
(** [encode what s] is the percent-encoding of [s] according to
[what]:
{ul
{- [`Uri_component] percent-encodes anything but
{{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.3}
[unreserved]} and
{{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2}
sub-delims} URI characters. In other words only
['a'..'z'], ['A'..'Z'], ['0'..'9'], ['-'], ['.'], ['_'], ['~']
and ['!'], ['$'], ['&'], ['\''], ['('], [')']
['*'], ['+'], [','], [';'], ['='] are not percent-encoded.}
{- [`Uri] percent-encodes like [`Url_component] except it also
preserves
{{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2}
gen-delims} URI characters. In other words in addition to those
characters above, [':'], ['/'], ['?'], ['#'], ['\['], ['\]'], ['@']
are not percent-encoded.}} *)

val decode : string -> string
(** [decode s] is the percent-encoding decode of [s]. *)
Expand Down

0 comments on commit f11a625

Please sign in to comment.