Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Add String.Unicode.{lstrip,rstrip} #683

Merged
merged 1 commit into from

2 participants

@devinus

No description provided.

@devinus

Don't merge this yet.

@josevalim
Owner

Ok. In any case, could you please decouple the refactoring of the current code from the actual feature? It'd better to review and to blame the code later.

@devinus

Okay, ready for peer review. @yrashk @josevalim @l4u

@josevalim
Owner

Ah, this looks great, thanks @devinus!

One thing it is important to distinguish is if String.strip should strip all spaces or only white spaces. In Unicode, we have a couple characters that are spaces but they are not a whitespace. Example:

http://www.fileformat.info/info/unicode/char/a0/index.htm

My initial reaction is that we should strip only whitespaces and therefore we could skip the category check as a whole and rely only on bidi.

@devinus

@josevalim That's how the first patch was. I didn't need to change it. Let me fix that.

@josevalim josevalim merged commit fa9a78f into elixir-lang:master

1 check passed

Details default The Travis build passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Dec 2, 2012
  1. @devinus
This page is out of date. Refresh to see the latest.
View
36 lib/elixir/lib/string.ex
@@ -18,9 +18,6 @@ defmodule String do
@type codepoint :: t
@type grapheme :: t
- # Entries considered as whitespace
- @whitespace [?\t, ?\n, ?\v, ?\f, ?\r, ?\s]
-
@doc """
Checks if a string is printable considering it is encoded
as UTF-8. Returns true if so, false otherwise.
@@ -186,28 +183,7 @@ defmodule String do
"""
@spec rstrip(t) :: t
-
- def rstrip(""), do: ""
-
- def rstrip(string) do
- if :binary.last(string) in @whitespace do
- do_rstrip(string, "")
- else
- string
- end
- end
-
- defp do_rstrip(<<char, string :: binary>>, buffer) when char in @whitespace do
- do_rstrip(string, <<char, buffer :: binary>>)
- end
-
- defp do_rstrip(<<char, string :: binary>>, buffer) do
- <<buffer :: binary, char, do_rstrip(string, "") :: binary>>
- end
-
- defp do_rstrip(<<>>, _) do
- <<>>
- end
+ defdelegate rstrip(binary), to: String.Unicode
@doc """
Returns a string where trailing `char` have been removed.
@@ -253,15 +229,7 @@ defmodule String do
String.lstrip(" abc ") #=> "abc "
"""
- @spec lstrip(t) :: t
-
- def lstrip(<<char, rest :: binary>>) when char in @whitespace do
- lstrip(rest)
- end
-
- def lstrip(other) do
- other
- end
+ defdelegate lstrip(binary), to: String.Unicode
@doc """
Returns a string where leading `char` have been removed.
View
45 lib/elixir/priv/unicode.ex
@@ -12,17 +12,20 @@ defmodule String.Unicode do
data_path = File.expand_path("../UnicodeData.txt", __FILE__)
- codes = Enum.reduce File.iterator!(data_path), [], fn(line, acc) ->
+ { codes, whitespace } = Enum.reduce File.iterator!(data_path), { [], [] }, fn(line, { cacc, wacc }) ->
[ codepoint, _name, _category,
- _class, _bidi, _decomposition,
+ _class, bidi, _decomposition,
_numeric_1, _numeric_2, _numeric_3,
_bidi_mirror, _unicode_1, _iso,
upper, lower, _title ] = :binary.split(line, ";", [:global])
- if upper != "" or lower != "" do
- [{ to_binary.(codepoint), upper, lower }|acc]
- else
- acc
+ cond do
+ upper != "" or lower != "" ->
+ { [{ to_binary.(codepoint), upper, lower } | cacc], wacc }
+ bidi in ["B", "S", "WS"] ->
+ { cacc, [to_binary.(codepoint) | wacc] }
+ true ->
+ { cacc, wacc }
end
end
@@ -70,6 +73,36 @@ defmodule String.Unicode do
<< >>
end
+ # Strip
+
+ def lstrip(""), do: ""
+
+ lc char inlist whitespace do
+ args = quote do: [unquote(char) <> rest]
+ exprs = quote do: lstrip(rest)
+ def :lstrip, args, [], do: exprs
+ end
+
+ def lstrip(other), do: other
+
+ def rstrip(""), do: ""
+
+ def rstrip(string) do
+ do_rstrip(string, "")
+ end
+
+ lc char inlist whitespace do
+ args = quote do: [unquote(char) <> rest, buffer]
+ exprs = quote do: do_rstrip(rest, unquote(char) <> buffer)
+ defp :do_rstrip, args, [], do: exprs
+ end
+
+ defp do_rstrip(<< char, string :: binary >>, buffer) do
+ << buffer :: binary, char, do_rstrip(string, "") :: binary >>
+ end
+
+ defp do_rstrip(<<>>, _), do: <<>>
+
# Graphemes
lc codepoints inlist seqs do
View
4 lib/elixir/test/elixir/string_test.exs
@@ -47,6 +47,8 @@ defmodule StringTest do
assert String.rstrip(" abc a") == " abc a"
assert String.rstrip("a abc a\n\n") == "a abc a"
assert String.rstrip("a abc a\t\n\v\f\r\s") == "a abc a"
+ assert String.rstrip("a abc a " <> <<31>>) == "a abc a"
+ assert String.rstrip("a abc a" <> <<194,133>>) == "a abc a"
assert String.rstrip(" abc aa", ?a) == " abc "
assert String.rstrip(" abc __", ?_) == " abc "
end
@@ -57,6 +59,8 @@ defmodule StringTest do
assert String.lstrip("a abc a") == "a abc a"
assert String.lstrip("\n\na abc a") == "a abc a"
assert String.lstrip("\t\n\v\f\r\sa abc a") == "a abc a"
+ assert String.lstrip(<<31>> <> " a abc a") == "a abc a"
+ assert String.lstrip(<<194,133>> <> "a abc a") == "a abc a"
assert String.lstrip("__ abc _", ?_) == " abc _"
end
Something went wrong with that request. Please try again.