From 34dade091617e4fec03a8988d1b6884958e2de26 Mon Sep 17 00:00:00 2001 From: vanstee Date: Sat, 24 Aug 2013 21:04:17 -0400 Subject: [PATCH 1/3] Revert "Merge pull request #1645 from vanstee/string-split-repeats" This reverts commit fb30101632f8e1ee94bb770ff858791aa6cba6a3, reversing changes made to c348eb7cc58459c3ff8f117c0e7a96384ad7734b. --- lib/elixir/lib/kernel.ex | 12 ++++++------ lib/elixir/lib/string.ex | 4 ++-- lib/elixir/priv/unicode.ex | 12 ++++++++++-- lib/elixir/test/elixir/string_test.exs | 8 ++++---- lib/iex/test/iex/helpers_test.exs | 2 +- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/elixir/lib/kernel.ex b/lib/elixir/lib/kernel.ex index e64767ea27d..bc17ab360b6 100644 --- a/lib/elixir/lib/kernel.ex +++ b/lib/elixir/lib/kernel.ex @@ -3621,15 +3621,15 @@ defmodule Kernel do case is_binary(string) do true -> case mod do - ?b -> lc p inlist String.split(string), p != "", do: p - ?a -> lc p inlist String.split(string), p != "", do: binary_to_atom(p) - ?c -> lc p inlist String.split(string), p != "", do: String.to_char_list!(p) + ?b -> String.split(string) + ?a -> lc p inlist String.split(string), do: binary_to_atom(p) + ?c -> lc p inlist String.split(string), do: String.to_char_list!(p) end false -> case mod do - ?b -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: p) - ?a -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: binary_to_atom(p)) - ?c -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: String.to_char_list!(p)) + ?b -> quote do: String.split(unquote(string)) + ?a -> quote do: lc(p inlist String.split(unquote(string)), do: binary_to_atom(p)) + ?c -> quote do: lc(p inlist String.split(unquote(string)), do: String.to_char_list!(p)) end end end diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex index 30b5547264d..e07ed7e3118 100644 --- a/lib/elixir/lib/string.ex +++ b/lib/elixir/lib/string.ex @@ -140,7 +140,7 @@ defmodule String do @doc """ Splits a string on substrings at each Unicode whitespace - occurrence. + occurrence with leading and trailing whitespace ignored. ## Examples @@ -149,7 +149,7 @@ defmodule String do iex> String.split("foo" <> <<194, 133>> <> "bar") ["foo", "bar"] iex> String.split(" foo bar ") - ["", "foo", "bar", ""] + ["foo", "bar"] """ @spec split(t) :: [t] diff --git a/lib/elixir/priv/unicode.ex b/lib/elixir/priv/unicode.ex index fc2ef35fc2a..f6849e63330 100644 --- a/lib/elixir/priv/unicode.ex +++ b/lib/elixir/priv/unicode.ex @@ -145,7 +145,11 @@ defmodule String.Unicode do lc codepoint inlist whitespace do defp do_split(unquote(codepoint) <> rest, buffer, acc) do - do_split(rest, "", [buffer | acc]) + if buffer != "" do + do_split(rest, "", [buffer | acc]) + else + do_split(rest, buffer, acc) + end end end @@ -154,7 +158,11 @@ defmodule String.Unicode do end defp do_split(<<>>, buffer, acc) do - [buffer | acc] + if buffer != "" do + [buffer | acc] + else + acc + end end # Graphemes diff --git a/lib/elixir/test/elixir/string_test.exs b/lib/elixir/test/elixir/string_test.exs index b6e0c3daec4..f642ae61d8a 100644 --- a/lib/elixir/test/elixir/string_test.exs +++ b/lib/elixir/test/elixir/string_test.exs @@ -18,10 +18,10 @@ defmodule StringTest do test :split do assert String.split("") == [""] assert String.split("foo bar") == ["foo", "bar"] - assert String.split(" foo bar") == ["", "foo", "bar"] - assert String.split("foo bar ") == ["foo", "bar", ""] - assert String.split(" foo bar ") == ["", "foo", "bar", ""] - assert String.split("foo\t\n\v\f\r\sbar\n") == ["foo", "", "", "", "", "", "bar", ""] + assert String.split(" foo bar") == ["foo", "bar"] + assert String.split("foo bar ") == ["foo", "bar"] + assert String.split(" foo bar ") == ["foo", "bar"] + assert String.split("foo\t\n\v\f\r\sbar\n") == ["foo", "bar"] assert String.split("foo" <> <<31>> <> "bar") == ["foo", "bar"] assert String.split("foo" <> <<194, 133>> <> "bar") == ["foo", "bar"] diff --git a/lib/iex/test/iex/helpers_test.exs b/lib/iex/test/iex/helpers_test.exs index 6c4659929fa..c86d451668a 100644 --- a/lib/iex/test/iex/helpers_test.exs +++ b/lib/iex/test/iex/helpers_test.exs @@ -133,7 +133,7 @@ defmodule IEx.HelpersTest do assert ["ebin", "lib", "mix.exs", "test"] = capture_io(fn -> ls end) |> String.split - |> Enum.filter(&(&1 != "")) + |> Enum.map(String.strip(&1)) |> Enum.sort assert capture_io(fn -> ls "~" end) == capture_io(fn -> ls System.user_home end) end From 64cdc768320f03d33d11dc1acd8a44c7810a0d34 Mon Sep 17 00:00:00 2001 From: vanstee Date: Sun, 25 Aug 2013 13:36:59 -0400 Subject: [PATCH 2/3] Accept the `trim` option when splitting strings By default `trim` will be set to true. Because `:binary.strip` only removes empty strings from the end of the result, we have to apply a filter to remove empty strings from the beginning before returning. --- lib/elixir/lib/string.ex | 18 +++++++++++++++--- lib/elixir/test/elixir/string_test.exs | 4 ++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex index e07ed7e3118..b14700678bb 100644 --- a/lib/elixir/lib/string.ex +++ b/lib/elixir/lib/string.ex @@ -163,12 +163,17 @@ defmodule String do The string is split into as many parts as possible by default, unless the `global` option is set to `false`. + Empty strings are removed from the result, unless the + `trim` option is set to `false`. + ## Examples iex> String.split("a,b,c", ",") ["a", "b", "c"] iex> String.split("a,b,c", ",", global: false) ["a", "b,c"] + iex> String.split(" a b c ", " ", trim: false) + ["", "a", "b", "c", ""] iex> String.split("1,2 3,4", [" ", ","]) ["1", "2", "3", "4"] @@ -188,12 +193,19 @@ defmodule String do def split("", _pattern, _options), do: [""] def split(binary, pattern, options) when is_regex(pattern) do - Regex.split(pattern, binary, global: options[:global]) + Regex.split(pattern, binary, options) end def split(binary, pattern, options) do - opts = if options[:global] != false, do: [:global], else: [] - :binary.split(binary, pattern, opts) + defaults = [global: true, trim: true] + options = Keyword.merge(defaults, options) + + option_keys = Enum.filter_map(options, &elem(&1, 1), &elem(&1, 0)) + splits = :binary.split(binary, pattern, option_keys) + + if options[:trim], do: splits = Enum.filter(splits, &(&1 != "")) + + splits end @doc """ diff --git a/lib/elixir/test/elixir/string_test.exs b/lib/elixir/test/elixir/string_test.exs index f642ae61d8a..d3b8c49bfc4 100644 --- a/lib/elixir/test/elixir/string_test.exs +++ b/lib/elixir/test/elixir/string_test.exs @@ -29,9 +29,13 @@ defmodule StringTest do assert String.split("a,b,c", ",") == ["a", "b", "c"] assert String.split("a,b", ".") == ["a,b"] assert String.split("1,2 3,4", [" ", ","]) == ["1", "2", "3", "4"] + assert String.split(" a b c ", " ") == ["a", "b", "c"] assert String.split("a,b,c", ",", global: false) == ["a", "b,c"] assert String.split("1,2 3,4", [" ", ","], global: false) == ["1", "2 3,4"] + + assert String.split(" a b c ", " ", trim: false) == ["", "a", "b", "c", ""] + assert String.split(" a b c ", " ", trim: false, global: false) == ["", "a b c "] end test :split_with_regex do From e304ea9543b1bc30b8f4e800b135f60819931b68 Mon Sep 17 00:00:00 2001 From: vanstee Date: Sun, 25 Aug 2013 13:58:42 -0400 Subject: [PATCH 3/3] Accept the `trim` option for `Regex.split/3` By default `trim` will be set to true. However, since the `global` option was implemented with the `parts` option (`{ :parts, :infinity }`), we have to apply a filter to the final result to support `trim`. --- lib/elixir/lib/regex.ex | 19 ++++++++++--------- lib/elixir/test/elixir/regex_test.exs | 7 ++++--- lib/elixir/test/elixir/string_test.exs | 2 ++ lib/ex_unit/lib/ex_unit/doc_test.ex | 2 +- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/elixir/lib/regex.ex b/lib/elixir/lib/regex.ex index 4baa2c9c539..5bfb87a75f6 100644 --- a/lib/elixir/lib/regex.ex +++ b/lib/elixir/lib/regex.ex @@ -264,16 +264,17 @@ defmodule Regex do def split(regex, string, options // []) def split(regex(re_pattern: compiled), string, options) do - parts = - cond do - Keyword.get(options, :global) == false -> 2 - p = Keyword.get(options, :parts) -> p - true -> :infinity - end + defaults = [global: true, trim: true, parts: :infinity, return: return_for(string)] + options = Keyword.merge(defaults, options) - return = Keyword.get(options, :return, return_for(string)) - opts = [return: return, parts: parts] - :re.split(string, compiled, opts) + unless options[:global], do: options = Keyword.put(options, :parts, 2) + + valid_options = Dict.take(options, [:parts, :return]) + splits = :re.split(string, compiled, valid_options) + + if options[:trim], do: splits = Enum.filter(splits, &(&1 != "")) + + splits end @doc %B""" diff --git a/lib/elixir/test/elixir/regex_test.exs b/lib/elixir/test/elixir/regex_test.exs index 3d948d5537b..94426bfefd2 100644 --- a/lib/elixir/test/elixir/regex_test.exs +++ b/lib/elixir/test/elixir/regex_test.exs @@ -113,13 +113,14 @@ defmodule Regex.BinaryTest do end test :split do - assert Regex.split(%r",", "") == [""] + assert Regex.split(%r",", "") == [] assert Regex.split(%r" ", "foo bar baz") == ["foo", "bar", "baz"] assert Regex.split(%r" ", "foo bar baz", parts: 2) == ["foo", "bar baz"] assert Regex.split(%r"\s", "foobar") == ["foobar"] assert Regex.split(%r" ", "foo bar baz") == ["foo", "bar", "baz"] - assert Regex.split(%r"=", "key=") == ["key", ""] - assert Regex.split(%r"=", "=value") == ["", "value"] + assert Regex.split(%r" ", " foo bar baz ", trim: false) == ["", "foo", "bar", "baz", ""] + assert Regex.split(%r"=", "key=") == ["key"] + assert Regex.split(%r"=", "=value") == ["value"] end test :replace do diff --git a/lib/elixir/test/elixir/string_test.exs b/lib/elixir/test/elixir/string_test.exs index d3b8c49bfc4..ba3f793f6eb 100644 --- a/lib/elixir/test/elixir/string_test.exs +++ b/lib/elixir/test/elixir/string_test.exs @@ -43,6 +43,8 @@ defmodule StringTest do assert String.split("a,b", %r{,}) == ["a", "b"] assert String.split("a,b,c", %r{,}) == ["a", "b", "c"] assert String.split("a,b,c", %r{,}, global: false) == ["a", "b,c"] + assert String.split("a,b.c ", %r{\W}) == ["a", "b", "c"] + assert String.split("a,b.c ", %r{\W}, trim: false) == ["a", "b", "c", ""] assert String.split("a,b", %r{\.}) == ["a,b"] end diff --git a/lib/ex_unit/lib/ex_unit/doc_test.ex b/lib/ex_unit/lib/ex_unit/doc_test.ex index 06c68263518..4f2602210da 100644 --- a/lib/ex_unit/lib/ex_unit/doc_test.ex +++ b/lib/ex_unit/lib/ex_unit/doc_test.ex @@ -331,7 +331,7 @@ defmodule ExUnit.DocTest do end defp extract_tests(line, doc) do - lines = String.split(doc, %r/\n/) |> adjust_indent + lines = String.split(doc, %r/\n/, trim: false) |> adjust_indent extract_tests(lines, line, "", "", [], true) end