Permalink
Browse files

Merge branch 'master' into date-time-records

  • Loading branch information...
2 parents 453fc87 + 91b43f2 commit 56844858d2561a806410a7029e26094e4772330d @devinus committed Oct 26, 2012
View
@@ -1,5 +1,7 @@
* enhancements
- * [String] Support String.downcase and String.upcase according to Unicode 6.2.0
+ * [IEx] Only show documented functions and also show docs for default generated functions
+ * [String] Support `String.downcase` and `String.upcase` according to Unicode 6.2.0
+ * [String] Add support for graphemes in `String.length`, `String.at` and others
* bug fix
* [Kernel] Fix an issue where variables inside clauses remained unassigned
View
@@ -36,7 +36,7 @@ lib/elixir/src/elixir.app.src: src/elixir.app.src
erlang:
@ cd lib/elixir && $(REBAR) compile
-lib/elixir/ebin/Elixir-String-Unicode.beam: lib/elixir/priv/unicode.ex lib/elixir/priv/UnicodeData.txt
+lib/elixir/ebin/Elixir-String-Unicode.beam: lib/elixir/priv/unicode.ex lib/elixir/priv/UnicodeData.txt lib/elixir/priv/NamedSequences.txt
@ echo "==> unicode (compile)";
@ echo "This step can take up to a minute to compile in order to embed the Unicode database"
@ bin/elixirc --ignore-module-conflict lib/elixir/priv/unicode.ex -o lib/elixir/ebin;
@@ -14,7 +14,6 @@
super=false, %% when true, it means super was invoked
caller=false, %% when true, it means caller was invoked
name_args=false, %% when true, it means arguments should be named
- macro=[], %% a stack with macros nesting
module=nil, %% the current module
function=nil, %% the current function
recur=nil, %% the current loop function to be recurred
View
@@ -2872,7 +2872,7 @@ defmodule Kernel do
defdelegate other_reverse(list), to: :lists, as: :reverse
end
- My:lists.reverse([1,2,3])
+ MyList.reverse([1,2,3])
#=> [3,2,1]
MyList.other_reverse([1,2,3])
@@ -139,8 +139,8 @@ defmodule Kernel.SpecialForms do
import :all, List
- It can also be customized to import only functions or only
- macros:
+ It can also be customized to import only all functions or
+ all macros:
import :functions, List
import :macros, List
@@ -448,11 +448,39 @@ defmodule Kernel.SpecialForms do
@doc """
This is the special form used to hold aliases information.
- At compilation time, it is usually compiled to an atom:
+ It is usually compiled to an atom:
quote do: Foo.Bar
{ :__aliases__, 0, [:Foo,:Bar] }
+ Elixir represents `Foo.Bar` as `__aliases__` so calls can be
+ unambiguously identified by the operator `:.`. For example:
+
+ quote do: Foo.bar
+ {{:.,0,[{:__aliases__,0,[:Foo]},:bar]},0,[]}
+
+ Whenever an expression iterator sees a `:.` as the tuple key,
+ it can be sure that it represents a call and the second element
+ of the arguments list is an atom.
+
+ On the other hand, aliases holds some properties:
+
+ 1) The head element of aliases can be any term;
+
+ 2) The tail elements of aliases are guaranteed to always be atoms;
+
+ 3) When the head element of aliases is the atom :Elixir, no expansion happen;
+
+ 4) When the head element of aliases is not an atom, it is expanded at runtime:
+
+ quote do: some_var.Foo
+ {:__aliases__,0,[{:some_var,0,:quoted},:Bar]}
+
+ Since `some_var` is not available at compilation time, the compiler
+ expands such expression to:
+
+ Module.concat [some_var, Foo]
+
"""
defmacro __aliases__(args)
end
View
@@ -316,7 +316,7 @@ defmodule Module do
assert_not_compiled!(:defines?, module)
table = function_table_for(module)
case ETS.lookup(table, tuple) do
- [{ _, ^kind, _, _, _, _, _, _ }] -> true
+ [{ _, ^kind, _, _, _, _, _ }] -> true
_ -> false
end
end
@@ -335,7 +335,7 @@ defmodule Module do
def definitions_in(module) do
assert_not_compiled!(:definitions_in, module)
table = function_table_for(module)
- lc { tuple, _, _, _, _, _, _, _ } inlist ETS.tab2list(table), do: tuple
+ lc { tuple, _, _, _, _, _, _ } inlist ETS.tab2list(table), do: tuple
end
@doc """
@@ -354,7 +354,7 @@ defmodule Module do
def definitions_in(module, kind) do
assert_not_compiled!(:definitions_in, module)
table = function_table_for(module)
- lc { tuple, stored_kind, _, _, _, _, _, _ } inlist ETS.tab2list(table), stored_kind == kind, do: tuple
+ lc { tuple, stored_kind, _, _, _, _, _ } inlist ETS.tab2list(table), stored_kind == kind, do: tuple
end
@doc """
View
@@ -1,8 +1,14 @@
defmodule String do
@moduledoc """
A string in Elixir is a utf-8 binary. This module
- contains function to work with utf-8 data and its
- codepoints.
+ contains function to work with utf-8 data, its
+ codepoints and graphemes.
+
+ Notice that graphemes is a superset of UTF-8 codepoints
+ which also contains named sequences as defined per
+ http://www.unicode.org/reports/tr34/. In short, graphemes
+ also contain multiple characters that are "perceived as
+ a single character" by readers.
For working with raw binaries, use Erlang's :binary
module.
@@ -90,7 +96,6 @@ defmodule String do
def printable?(<<>>), do: true
def printable?(_), do: false
-
@doc """
Divides a string into sub string based on a pattern,
returning a list of these sub string. The pattern can
@@ -285,18 +290,47 @@ defmodule String do
String.codepoints("ἅἪῼ") #=> ["ἅ","Ἢ","ῼ"]
"""
- def codepoints(string) do
- do_codepoints(codepoint(string))
- end
+ defdelegate codepoints(string), to: String.Unicode
- defp do_codepoints({char, rest}) do
- [char|do_codepoints(codepoint(rest))]
- end
+ @doc """
+ Returns the next codepoint in a String.
+
+ The result is a tuple with the codepoint and the
+ remaining of the string or `:no_codepoint` in case
+ the String reached its end.
+
+ ## Examples
+
+ String.next_codepoint("josé") #=> { "j", "osé" }
+
+ """
+ defdelegate next_codepoint(string), to: String.Unicode
+
+ @doc """
+ Returns unicode graphemes in the string
+
+ ## Examples
+ String.graphemes("Ā̀stute") # => ["Ā̀","s","t","u","t","e"]
- defp do_codepoints(:no_codepoint), do: []
+ """
+ defdelegate graphemes(string), to: String.Unicode
@doc """
- Returns the first codepoint from an utf8 string.
+ Returns the next grapheme in a String.
+
+ The result is a tuple with the grapheme and the
+ remaining of the string or `:no_grapheme` in case
+ the String reached its end.
+
+ ## Examples
+
+ String.next_grapheme("josé") #=> { "j", "osé" }
+
+ """
+ defdelegate next_grapheme(string), to: String.Unicode
+
+ @doc """
+ Returns the first grapheme from an utf8 string.
## Examples
@@ -305,14 +339,14 @@ defmodule String do
"""
def first(string) do
- case codepoint(string) do
+ case next_grapheme(string) do
{ char, _ } -> char
- :no_codepoint -> ""
+ :no_grapheme -> ""
end
end
@doc """
- Returns the last codepoint from an utf8 string.
+ Returns the last grapheme from an utf8 string.
## Examples
@@ -321,17 +355,17 @@ defmodule String do
"""
def last(string) do
- do_last(codepoint(string), "")
+ do_last(next_grapheme(string), "")
end
defp do_last({char, rest}, _) do
- do_last(codepoint(rest), char)
+ do_last(next_grapheme(rest), char)
end
- defp do_last(:no_codepoint, last_char), do: last_char
+ defp do_last(:no_grapheme, last_char), do: last_char
@doc """
- Returns the number of codepoint in an utf8 string.
+ Returns the number of unicode graphemes in an utf8 string.
## Examples
@@ -340,17 +374,17 @@ defmodule String do
"""
def length(string) do
- do_length(codepoint(string))
+ do_length(next_grapheme(string))
end
defp do_length({_, rest}) do
- 1 + do_length(codepoint(rest))
+ 1 + do_length(next_grapheme(rest))
end
- defp do_length(:no_codepoint), do: 0
+ defp do_length(:no_grapheme), do: 0
@doc """
- Returns the codepoint in the `position` of the given utf8 `string`.
+ Returns the grapheme in the `position` of the given utf8 `string`.
If `position` is greater than `string` length, than it returns `nil`.
## Examples
@@ -359,51 +393,28 @@ defmodule String do
String.at("elixir", 1) #=> "l"
String.at("elixir", 10) #=> nil
String.at("elixir", -1) #=> "r"
- String.at("elixir", -10) #=> "nil"
+ String.at("elixir", -10) #=> nil
"""
def at(string, position) when position >= 0 do
- do_at(codepoint(string), position, 0)
+ do_at(next_grapheme(string), position, 0)
end
def at(string, position) when position < 0 do
- real_pos = do_length(codepoint(string)) - abs(position)
+ real_pos = do_length(next_grapheme(string)) - abs(position)
case real_pos >= 0 do
- true -> do_at(codepoint(string), real_pos, 0)
+ true -> do_at(next_grapheme(string), real_pos, 0)
false -> ""
end
end
defp do_at({_ , rest}, desired_pos, current_pos) when desired_pos > current_pos do
- do_at(codepoint(rest), desired_pos, current_pos + 1)
+ do_at(next_grapheme(rest), desired_pos, current_pos + 1)
end
defp do_at({char, _}, desired_pos, current_pos) when desired_pos == current_pos do
char
end
- defp do_at(:no_codepoint, _, _), do: ""
-
- # Private implementation which returns the first codepoint
- # of any given utf8 string and the rest of it
- # If an empty string is given, :no_codepoint is returned.
- defp codepoint(<<194, char, rest :: binary>>)
- when char in 161..191,
- do: { <<194, char>>, rest }
-
- defp codepoint(<<first, char, rest :: binary>>)
- when first in 195..223 and char in 128..191,
- do: { <<first, char>>, rest }
-
- defp codepoint(<<first, second, char, rest :: binary>>)
- when first == 224 and second in 160..191 and char in 128..191,
- do: { <<first, second, char>>, rest }
-
- defp codepoint(<<first, second, char, rest :: binary>>)
- when first in 225..239 and second in 128..191 and char in 128..191,
- do: { <<first, second, char>>, rest }
-
- defp codepoint(<<other, rest :: binary>>), do: { <<other>>, rest }
-
- defp codepoint(<<>>), do: :no_codepoint
+ defp do_at(:no_grapheme, _, _), do: nil
end
Oops, something went wrong.

0 comments on commit 5684485

Please sign in to comment.