Permalink
Browse files

Escape XHP content

Summary:
Text of XHP body and attributes should be processed to decode HTML entities however escape sequences in it are not handled.

Note: In existing code during lowering we conflate several cases that should be processed differently, i.e.
```
<a attr="\t &amp;"/>
```
and
```
<a attr={"\t &amp;"}>
```
is both turned into
```
Xml { attributes = String("\t") }
```
In order to deal with it correctly decoding of HTML entities is moved to lowering

Reviewed By: oulgen

Differential Revision: D7149685

fbshipit-source-id: 44274c0cc03e0c25b35b6322770d93162b417170
  • Loading branch information...
vladima authored and hhvm-bot committed Mar 4, 2018
1 parent d6b0a48 commit d3ab762207d963df9ff073249f9e06659df3e3f3
@@ -1385,16 +1385,15 @@ and emit_xhp env p id attributes children =
let create_spread p id = (p, "...$" ^ string_of_int(id)) in
let convert_attr (spread_id, attrs) = function
| A.Xhp_simple (name, v) ->
let attr = (A.SFlit name, Html_entities.decode_expr v) in
let attr = (A.SFlit name, v) in
(spread_id, attr::attrs)
| A.Xhp_spread e ->
let (p, _) = e in
let attr = (A.SFlit (create_spread p spread_id), Html_entities.decode_expr e) in
let attr = (A.SFlit (create_spread p spread_id), e) in
(spread_id + 1, attr::attrs) in
let (_, attributes) = List.fold_left ~f:convert_attr ~init:(0, []) attributes in
let attribute_map = p, A.Shape (List.rev attributes) in
let dec_children = List.map ~f:Html_entities.decode_expr children in
let children_vec = p, A.Varray dec_children in
let children_vec = p, A.Varray children in
let filename = p, A.Id (p, "__FILE__") in
let line = p, A.Id (p, "__LINE__") in
let renamed_id = rename_xhp id in
@@ -422,15 +422,16 @@ let unempty_str = function
| "''" | "\"\"" -> ""
| s -> s
let unesc_dbl s = unempty_str @@ Php_escaping.unescape_double s
let get_quoted_content s =
let open Str in
if string_match (regexp "[ \t\n\r\012]*\"\\(\\(.\\|\n\\)*\\)\"") s 0
then matched_group 1 s
else s
let unesc_xhp s =
let whitespace = Str.regexp "[ \t\n\r\012]+" in
Str.global_replace whitespace " " s
let unesc_xhp_attr s =
let open Str in
unesc_dbl @@
if string_match (regexp "[ \t\n\r\012]*\"\\(\\(.\\|\n\\)*\\)\"") s 0
then matched_group 1 s
else s
unesc_dbl @@ get_quoted_content s
type suspension_kind =
| SKSync
@@ -1360,6 +1361,20 @@ and pExpr ?location:(location=TopLevel) : expr parser = fun node env ->
in
let pEmbedded escaper node env =
match syntax node with
| Token { Token.kind = TK.XHPStringLiteral; _ }
when env.codegen ->
let p = pPos node env in
(* for XHP string literals (attribute values) just extract
value from quotes and decode HTML entities *)
let text =
Html_entities.decode @@ get_quoted_content (full_text node) in
p, String (p, text)
| Token { Token.kind = TK.XHPBody; _ }
when env.codegen ->
let p = pPos node env in
(* for XHP body - only decode HTML entities *)
let text = Html_entities.decode @@ unesc_xhp (full_text node) in
p, String (p, text)
| Token _ ->
let p = pPos node env in
p, String (p, escaper (full_text node))
@@ -331,13 +331,3 @@ let decode_entity s =
let decode s = Str.global_substitute entity_regex (fun m ->
decode_entity (Str.matched_string m)
) s
let rec decode_expr (p, e) = (p, decode_expr_ e)
and decode_expr_ e = match e with
| Ast.String (p, s) -> Ast.String (p, decode s)
| Ast.Xml (id, attrs, children) ->
let dec_attrs = List.map (Ast_utils.map_xhp_attr (fun x -> x) decode_expr) attrs in
let dec_children = List.map decode_expr children in
Ast.Xml (id, dec_attrs, dec_children)
| _ -> e

0 comments on commit d3ab762

Please sign in to comment.