From 4c9a727fccfcb0d0a52371c5bd49b138978bb8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Mon, 4 Nov 2024 20:08:51 +0800 Subject: [PATCH 1/4] Fix formatter adding extra escapes to remote call functions --- lib/elixir/lib/code/formatter.ex | 23 ++++++------ lib/elixir/lib/code/normalizer.ex | 6 +-- lib/elixir/lib/macro.ex | 37 ++++++++++++++----- lib/elixir/src/elixir_tokenizer.erl | 6 ++- .../test/elixir/code_formatter/calls_test.exs | 1 + .../elixir/code_formatter/operators_test.exs | 1 + 6 files changed, 49 insertions(+), 25 deletions(-) diff --git a/lib/elixir/lib/code/formatter.ex b/lib/elixir/lib/code/formatter.ex index 6322b8530bc..0ac2d767f0f 100644 --- a/lib/elixir/lib/code/formatter.ex +++ b/lib/elixir/lib/code/formatter.ex @@ -582,15 +582,9 @@ defmodule Code.Formatter do {left, state} = case left_arg do {:__block__, _, [atom]} when is_atom(atom) -> - iodata = - if Macro.classify_atom(atom) in [:identifier, :unquoted] do - [Atom.to_string(atom), ?:] - else - [?", atom |> Atom.to_string() |> String.replace("\"", "\\\""), ?", ?:] - end - - {iodata - |> IO.iodata_to_binary() + formatted = Macro.inspect_atom(:key, atom, escape: &escape_atom/2) + + {formatted |> string() |> color_doc(:atom, state.inspect_opts), state} @@ -1010,7 +1004,7 @@ defmodule Code.Formatter do ) when is_atom(fun) and is_integer(arity) do {target_doc, state} = remote_target_to_algebra(target, state) - fun = Macro.inspect_atom(:remote_call, fun) + fun = Macro.inspect_atom(:remote_call, fun, escape: &escape_atom/2) {target_doc |> nest(1) |> concat(string(".#{fun}/#{arity}")), state} end @@ -1057,7 +1051,9 @@ defmodule Code.Formatter do {target_doc, state} = remote_target_to_algebra(target, state) fun_doc = - Macro.inspect_atom(:remote_call, fun) |> string() |> color_doc(:call, state.inspect_opts) + Macro.inspect_atom(:remote_call, fun, escape: &escape_atom/2) + |> string() + |> color_doc(:call, state.inspect_opts) remote_doc = target_doc |> concat(".") |> concat(fun_doc) @@ -2441,6 +2437,11 @@ defmodule Code.Formatter do meta[:closing][:line] || @min_line end + defp escape_atom(string, char) do + char = List.to_string([char]) + String.replace(string, char, "\\#{char}") + end + ## Algebra helpers # Relying on the inner document is brittle and error prone. diff --git a/lib/elixir/lib/code/normalizer.ex b/lib/elixir/lib/code/normalizer.ex index 32189dfc972..b77e056de6b 100644 --- a/lib/elixir/lib/code/normalizer.ex +++ b/lib/elixir/lib/code/normalizer.ex @@ -126,13 +126,13 @@ defmodule Code.Normalizer do {:., meta, [Access, :get]} end - # Only normalize the left side of the dot operator # The right hand side is an atom in the AST but it's not an atom literal, so - # it should not be wrapped - defp do_normalize({:., meta, [left, right]}, state) do + # it should not be wrapped. However, it should be escaped if applicable. + defp do_normalize({:., meta, [left, right]}, state) when is_atom(right) do meta = patch_meta_line(meta, state.parent_meta) left = do_normalize(left, %{state | parent_meta: meta}) + right = maybe_escape_literal(right, state) {:., meta, [left, right]} end diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index 6a1923eaf03..dfe94d823a6 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -2328,6 +2328,15 @@ defmodule Macro do as a key (`:key`), or as the function name of a remote call (`:remote_call`). + ## Options + + * `:escape` - a two-arity function used to escape a quoted + atom content, if necessary. The function receives the atom + content as string and a quote delimiter character, which + should always be escaped. By default the content is escaped + such that the inspected sequence would be parsed as the + given atom. + ## Examples ### As a literal @@ -2376,14 +2385,19 @@ defmodule Macro do """ @doc since: "1.14.0" - @spec inspect_atom(:literal | :key | :remote_call, atom) :: binary - def inspect_atom(source_format, atom) + @spec inspect_atom(:literal | :key | :remote_call, atom, keyword) :: binary + def inspect_atom(source_format, atom, opts \\ []) do + opts = Keyword.validate!(opts, [:escape]) - def inspect_atom(:literal, atom) when is_nil(atom) or is_boolean(atom) do + escape = Keyword.get(opts, :escape, &inspect_atom_escape/2) + do_inspect_atom(source_format, atom, escape) + end + + def do_inspect_atom(:literal, atom, _escape) when is_nil(atom) or is_boolean(atom) do Atom.to_string(atom) end - def inspect_atom(:literal, atom) when is_atom(atom) do + def do_inspect_atom(:literal, atom, escape) when is_atom(atom) do binary = Atom.to_string(atom) case classify_atom(atom) do @@ -2395,7 +2409,7 @@ defmodule Macro do end :quoted -> - {escaped, _} = Code.Identifier.escape(binary, ?") + escaped = escape.(binary, ?") IO.iodata_to_binary([?:, ?", escaped, ?"]) _ -> @@ -2403,7 +2417,7 @@ defmodule Macro do end end - def inspect_atom(:key, atom) when is_atom(atom) do + def do_inspect_atom(:key, atom, escape) when is_atom(atom) do binary = Atom.to_string(atom) case classify_atom(atom) do @@ -2411,7 +2425,7 @@ defmodule Macro do IO.iodata_to_binary([?", binary, ?", ?:]) :quoted -> - {escaped, _} = Code.Identifier.escape(binary, ?") + escaped = escape.(binary, ?") IO.iodata_to_binary([?", escaped, ?", ?:]) _ -> @@ -2419,7 +2433,7 @@ defmodule Macro do end end - def inspect_atom(:remote_call, atom) when is_atom(atom) do + def do_inspect_atom(:remote_call, atom, escape) when is_atom(atom) do binary = Atom.to_string(atom) case inner_classify(atom) do @@ -2431,13 +2445,18 @@ defmodule Macro do if type in [:not_callable, :alias] do binary else - elem(Code.Identifier.escape(binary, ?"), 0) + escape.(binary, ?") end IO.iodata_to_binary([?", escaped, ?"]) end end + defp inspect_atom_escape(string, char) do + {escaped, _} = Code.Identifier.escape(string, char) + escaped + end + # Classifies the given atom into one of the following categories: # # * `:alias` - a valid Elixir alias, like `Foo`, `Foo.Bar` and so on diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 4a81819d044..0210129ab9f 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -914,9 +914,11 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, Scope, Tokens) when ?i InterScope end, - case unsafe_to_atom(Part, Line, Column, NewScope) of + {ok, [UnescapedPart]} = unescape_tokens([Part], Line, Column, NewScope), + + case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of {ok, Atom} -> - Token = check_call_identifier(Line, Column, Part, Atom, Rest), + Token = check_call_identifier(Line, Column, UnescapedPart, Atom, Rest), TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]); diff --git a/lib/elixir/test/elixir/code_formatter/calls_test.exs b/lib/elixir/test/elixir/code_formatter/calls_test.exs index 6dc9925efe9..4df2138a9a2 100644 --- a/lib/elixir/test/elixir/code_formatter/calls_test.exs +++ b/lib/elixir/test/elixir/code_formatter/calls_test.exs @@ -595,6 +595,7 @@ defmodule Code.Formatter.CallsTest do assert_same ~S[Kernel.+(1, 2)] assert_same ~S[:erlang.+(1, 2)] assert_same ~S[foo."bar baz"(1, 2)] + assert_same ~S[foo."bar\nbaz"(1, 2)] end test "splits on arguments and dot on line limit" do diff --git a/lib/elixir/test/elixir/code_formatter/operators_test.exs b/lib/elixir/test/elixir/code_formatter/operators_test.exs index ec613a7ff2e..255553d38a6 100644 --- a/lib/elixir/test/elixir/code_formatter/operators_test.exs +++ b/lib/elixir/test/elixir/code_formatter/operators_test.exs @@ -948,6 +948,7 @@ defmodule Code.Formatter.OperatorsTest do assert_format "&(Mod.foo/1)", "&Mod.foo/1" assert_format "&(Mod.++/1)", "&Mod.++/1" assert_format ~s[&(Mod."foo bar"/1)], ~s[&Mod."foo bar"/1] + assert_format ~S[&(Mod."foo\nbar"/1)], ~S[&Mod."foo\nbar"/1] # Invalid assert_format "& Mod.foo/bar", "&(Mod.foo() / bar)" From e14c2926231e3277d9cf50eadda97f996687816d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Mon, 4 Nov 2024 13:55:22 +0100 Subject: [PATCH 2/4] Update lib/elixir/src/elixir_tokenizer.erl --- lib/elixir/src/elixir_tokenizer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 0210129ab9f..45ee82c244e 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -918,7 +918,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, Scope, Tokens) when ?i case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of {ok, Atom} -> - Token = check_call_identifier(Line, Column, UnescapedPart, Atom, Rest), + Token = check_call_identifier(Line, Column, Part, Atom, Rest), TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]); From 8d25c68bdf56fb60db34b2707cc81f6a47eefa7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Mon, 4 Nov 2024 14:15:05 +0100 Subject: [PATCH 3/4] Update lib/elixir/lib/code/formatter.ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/elixir/lib/code/formatter.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/elixir/lib/code/formatter.ex b/lib/elixir/lib/code/formatter.ex index 0ac2d767f0f..ac40b4a0065 100644 --- a/lib/elixir/lib/code/formatter.ex +++ b/lib/elixir/lib/code/formatter.ex @@ -2438,8 +2438,7 @@ defmodule Code.Formatter do end defp escape_atom(string, char) do - char = List.to_string([char]) - String.replace(string, char, "\\#{char}") + String.replace(string, <>, <>) end ## Algebra helpers From ccbc8cee05d428c4ae6d621aee26d78aa3d77458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Mon, 4 Nov 2024 21:19:13 +0800 Subject: [PATCH 4/4] Up --- lib/elixir/lib/macro.ex | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index dfe94d823a6..312c697c66b 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -2386,18 +2386,13 @@ defmodule Macro do """ @doc since: "1.14.0" @spec inspect_atom(:literal | :key | :remote_call, atom, keyword) :: binary - def inspect_atom(source_format, atom, opts \\ []) do - opts = Keyword.validate!(opts, [:escape]) + def inspect_atom(source_format, atom, opts \\ []) - escape = Keyword.get(opts, :escape, &inspect_atom_escape/2) - do_inspect_atom(source_format, atom, escape) - end - - def do_inspect_atom(:literal, atom, _escape) when is_nil(atom) or is_boolean(atom) do + def inspect_atom(:literal, atom, _opts) when is_nil(atom) or is_boolean(atom) do Atom.to_string(atom) end - def do_inspect_atom(:literal, atom, escape) when is_atom(atom) do + def inspect_atom(:literal, atom, opts) when is_atom(atom) do binary = Atom.to_string(atom) case classify_atom(atom) do @@ -2409,7 +2404,7 @@ defmodule Macro do end :quoted -> - escaped = escape.(binary, ?") + escaped = inspect_atom_escape(opts, binary, ?") IO.iodata_to_binary([?:, ?", escaped, ?"]) _ -> @@ -2417,7 +2412,7 @@ defmodule Macro do end end - def do_inspect_atom(:key, atom, escape) when is_atom(atom) do + def inspect_atom(:key, atom, opts) when is_atom(atom) do binary = Atom.to_string(atom) case classify_atom(atom) do @@ -2425,7 +2420,7 @@ defmodule Macro do IO.iodata_to_binary([?", binary, ?", ?:]) :quoted -> - escaped = escape.(binary, ?") + escaped = inspect_atom_escape(opts, binary, ?") IO.iodata_to_binary([?", escaped, ?", ?:]) _ -> @@ -2433,7 +2428,7 @@ defmodule Macro do end end - def do_inspect_atom(:remote_call, atom, escape) when is_atom(atom) do + def inspect_atom(:remote_call, atom, opts) when is_atom(atom) do binary = Atom.to_string(atom) case inner_classify(atom) do @@ -2445,16 +2440,20 @@ defmodule Macro do if type in [:not_callable, :alias] do binary else - escape.(binary, ?") + inspect_atom_escape(opts, binary, ?") end IO.iodata_to_binary([?", escaped, ?"]) end end - defp inspect_atom_escape(string, char) do - {escaped, _} = Code.Identifier.escape(string, char) - escaped + defp inspect_atom_escape(opts, string, char) do + if escape = opts[:escape] do + escape.(string, char) + else + {escaped, _} = Code.Identifier.escape(string, char) + escaped + end end # Classifies the given atom into one of the following categories: