Skip to content

Make regexes fall back to binary matching on incompatible runtime version #9040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 41 additions & 19 deletions lib/elixir/lib/regex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -128,17 +128,18 @@ defmodule Regex do
## Precompilation

Regular expressions built with sigil are precompiled and stored in `.beam`
files. Precompiled regexes are not guaranteed to be compatible between OSes
and OTP releases. This is rarely a problem, as most Elixir code shared
during development is compiled on the target (such as dependencies, archives,
and escripts) and, when running in production, the code must either be
compiled on the target (via `mix compile` or similar) or released on the
files. Precompiled regexes will be checked in runtime and may work slower
between operating systems and OTP releases. This is rarely a problem, as most Elixir code
shared during development is compiled on the target (such as dependencies,
archives, and escripts) and, when running in production, the code must either
be compiled on the target (via `mix compile` or similar) or released on the
host (via `mix releases` or similar) with a matching OTP, OS and architecture
as as the target.

However, if you find yourself in a scenario where cross-compilation is
necessary, you can manually invoke `Regex.recompile/1` or `Regex.recompile!/1`
to perform a runtime version check and recompile the regex if necessary.
If you know you are running on a different system that the current one and
you are doing multiple matches with the regex, you can manually invoke
`Regex.recompile/1` or `Regex.recompile!/1` to perform a runtime version
check and recompile the regex if necessary.
"""

defstruct re_pattern: nil, source: "", opts: "", re_version: ""
Expand Down Expand Up @@ -264,8 +265,8 @@ defmodule Regex do

"""
@spec match?(t, String.t()) :: boolean
def match?(%Regex{re_pattern: compiled}, string) when is_binary(string) do
:re.run(string, compiled, [{:capture, :none}]) == :match
def match?(%Regex{} = regex, string) when is_binary(string) do
safe_run(regex, string, [{:capture, :none}]) == :match
end

@doc """
Expand Down Expand Up @@ -312,11 +313,11 @@ defmodule Regex do
@spec run(t, binary, [term]) :: nil | [binary] | [{integer, integer}]
def run(regex, string, options \\ [])

def run(%Regex{re_pattern: compiled}, string, options) when is_binary(string) do
def run(%Regex{} = regex, string, options) when is_binary(string) do
return = Keyword.get(options, :return, :binary)
captures = Keyword.get(options, :capture, :all)

case :re.run(string, compiled, [{:capture, captures, return}]) do
case safe_run(regex, string, [{:capture, captures, return}]) do
:nomatch -> nil
:match -> []
{:match, results} -> results
Expand Down Expand Up @@ -397,7 +398,17 @@ defmodule Regex do

"""
@spec names(t) :: [String.t()]
def names(%Regex{re_pattern: re_pattern}) do
def names(%Regex{re_pattern: compiled, re_version: version, source: source}) do
re_pattern =
case version() do
^version ->
compiled

_ ->
{:ok, recompiled} = :re.compile(source)
recompiled
end

{:namelist, names} = :re.inspect(re_pattern, :namelist)
names
end
Expand Down Expand Up @@ -437,18 +448,29 @@ defmodule Regex do
@spec scan(t, String.t(), [term]) :: [[String.t()]]
def scan(regex, string, options \\ [])

def scan(%Regex{re_pattern: compiled}, string, options) when is_binary(string) do
def scan(%Regex{} = regex, string, options) when is_binary(string) do
return = Keyword.get(options, :return, :binary)
captures = Keyword.get(options, :capture, :all)
options = [{:capture, captures, return}, :global]

case :re.run(string, compiled, options) do
case safe_run(regex, string, options) do
:match -> []
:nomatch -> []
{:match, results} -> results
end
end

defp safe_run(
%Regex{re_pattern: compiled, source: source, re_version: version},
string,
options
) do
case version() do
^version -> :re.run(string, compiled, options)
_ -> :re.run(string, source, options)
end
end

@doc """
Splits the given target based on the given pattern and in the given number of
parts.
Expand Down Expand Up @@ -508,11 +530,11 @@ defmodule Regex do
end
end

def split(%Regex{re_pattern: compiled}, string, opts)
def split(%Regex{} = regex, string, opts)
when is_binary(string) and is_list(opts) do
on = Keyword.get(opts, :on, :first)

case :re.run(string, compiled, [:global, capture: on]) do
case safe_run(regex, string, [:global, capture: on]) do
{:match, matches} ->
index = parts_to_index(Keyword.get(opts, :parts, :infinity))
trim = Keyword.get(opts, :trim, false)
Expand Down Expand Up @@ -634,11 +656,11 @@ defmodule Regex do
do_replace(regex, string, {replacement, arity}, options)
end

defp do_replace(%Regex{re_pattern: compiled}, string, replacement, options) do
defp do_replace(%Regex{} = regex, string, replacement, options) do
opts = if Keyword.get(options, :global) != false, do: [:global], else: []
opts = [{:capture, :all, :index} | opts]

case :re.run(string, compiled, opts) do
case safe_run(regex, string, opts) do
:nomatch ->
string

Expand Down
2 changes: 1 addition & 1 deletion lib/elixir/src/elixir.app.src
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
{registered, [elixir_config, elixir_code_server]},
{applications, [kernel,stdlib,compiler]},
{mod, {elixir,[]}},
{env, [{ansi_enabled, false}, {time_zone_database, 'Elixir.Calendar.UTCOnlyTimeZoneDatabase'}]}
{env, [{check_endianness, true}, {ansi_enabled, false}, {time_zone_database, 'Elixir.Calendar.UTCOnlyTimeZoneDatabase'}]}
]}.
6 changes: 5 additions & 1 deletion lib/elixir/src/elixir.erl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ start(_Type, _Args) ->
preload_common_modules(),
set_stdio_and_stderr_to_binary_and_maybe_utf8(),
check_file_encoding(Encoding),
check_endianness(),

case application:get_env(elixir, check_endianness, true) of
true -> check_endianness();
false -> ok
end,

Tokenizer = case code:ensure_loaded('Elixir.String.Tokenizer') of
{module, Mod} -> Mod;
Expand Down
45 changes: 45 additions & 0 deletions lib/elixir/test/elixir/regex_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,39 @@ Code.require_file("test_helper.exs", __DIR__)
defmodule RegexTest do
use ExUnit.Case, async: true

@re_21_3_little %Regex{
re_pattern:
{:re_pattern, 1, 0, 0,
<<69, 82, 67, 80, 94, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255,
255, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 131, 0, 20, 29, 99, 133,
0, 7, 0, 1, 29, 100, 119, 0, 5, 29, 101, 120, 0, 12, 120, 0, 20, 0>>},
re_version: {"8.42 2018-03-20", :little},
source: "c(?<foo>d|e)"
}

@re_21_3_big %Regex{
re_pattern:
{:re_pattern, 1, 0, 0,
<<80, 67, 82, 69, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 17, 255, 255, 255, 255, 255, 255, 255,
255, 0, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 56, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 131, 0, 20, 29, 99, 133, 0, 7, 0, 1, 29, 100, 119,
0, 5, 29, 101, 120, 0, 12, 120, 0, 20, 0>>},
re_version: {"8.42 2018-03-20", :big},
source: "c(?<foo>d|e)"
}

@re_19_3_little %Regex{
re_pattern:
{:re_pattern, 1, 0, 0,
<<69, 82, 67, 80, 94, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255,
255, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 125, 0, 20, 29, 99, 127,
0, 7, 0, 1, 29, 100, 113, 0, 5, 29, 101, 114, 0, 12, 114, 0, 20, 0>>},
re_version: {"8.33 2013-05-29", :little},
source: "c(?<foo>d|e)"
}

doctest Regex

test "multiline" do
Expand Down Expand Up @@ -155,6 +188,12 @@ defmodule RegexTest do
assert Regex.run(~r"e", "abcd", return: :index) == nil
end

test "run/3 with regexes compiled in different systems" do
assert Regex.run(@re_21_3_little, "abcd abce", capture: :all_names) == ["d"]
assert Regex.run(@re_21_3_big, "abcd abce", capture: :all_names) == ["d"]
assert Regex.run(@re_19_3_little, "abcd abce", capture: :all_names) == ["d"]
end

test "scan/2" do
assert Regex.scan(~r"c(d|e)", "abcd abce") == [["cd", "d"], ["ce", "e"]]
assert Regex.scan(~r"c(?:d|e)", "abcd abce") == [["cd"], ["ce"]]
Expand All @@ -168,6 +207,12 @@ defmodule RegexTest do
assert Regex.scan(~r/c(?<foo>d|e)/, "abcd abce", capture: :all_names) == [["d"], ["e"]]
end

test "scan/2 with regexes compiled in different systems" do
assert Regex.scan(@re_21_3_little, "abcd abce", capture: :all_names) == [["d"], ["e"]]
assert Regex.scan(@re_21_3_big, "abcd abce", capture: :all_names) == [["d"], ["e"]]
assert Regex.scan(@re_19_3_little, "abcd abce", capture: :all_names) == [["d"], ["e"]]
end

test "split/2,3" do
assert Regex.split(~r",", "") == [""]
assert Regex.split(~r",", "", trim: true) == []
Expand Down
3 changes: 2 additions & 1 deletion lib/mix/lib/mix/tasks/escript.build.ex
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,10 @@ defmodule Mix.Tasks.Escript.Build do

defp main_body_for(:elixir) do
quote do
load_config(@config)

case :application.ensure_all_started(:elixir) do
{:ok, _} ->
load_config(@config)
start_app(@app)
args = Enum.map(args, &List.to_string(&1))
Kernel.CLI.run(fn _ -> @module.main(args) end)
Expand Down