Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge branch 'master' into date-time-records

  • Loading branch information...
commit 56844858d2561a806410a7029e26094e4772330d 2 parents 453fc87 + 91b43f2
Devin Torres authored
4 CHANGELOG.md
Source Rendered
... ... @@ -1,5 +1,7 @@
1 1 * enhancements
2   - * [String] Support String.downcase and String.upcase according to Unicode 6.2.0
  2 + * [IEx] Only show documented functions and also show docs for default generated functions
  3 + * [String] Support `String.downcase` and `String.upcase` according to Unicode 6.2.0
  4 + * [String] Add support for graphemes in `String.length`, `String.at` and others
3 5
4 6 * bug fix
5 7 * [Kernel] Fix an issue where variables inside clauses remained unassigned
2  Makefile
@@ -36,7 +36,7 @@ lib/elixir/src/elixir.app.src: src/elixir.app.src
36 36 erlang:
37 37 @ cd lib/elixir && $(REBAR) compile
38 38
39   -lib/elixir/ebin/Elixir-String-Unicode.beam: lib/elixir/priv/unicode.ex lib/elixir/priv/UnicodeData.txt
  39 +lib/elixir/ebin/Elixir-String-Unicode.beam: lib/elixir/priv/unicode.ex lib/elixir/priv/UnicodeData.txt lib/elixir/priv/NamedSequences.txt
40 40 @ echo "==> unicode (compile)";
41 41 @ echo "This step can take up to a minute to compile in order to embed the Unicode database"
42 42 @ bin/elixirc --ignore-module-conflict lib/elixir/priv/unicode.ex -o lib/elixir/ebin;
1  lib/elixir/include/elixir.hrl
@@ -14,7 +14,6 @@
14 14 super=false, %% when true, it means super was invoked
15 15 caller=false, %% when true, it means caller was invoked
16 16 name_args=false, %% when true, it means arguments should be named
17   - macro=[], %% a stack with macros nesting
18 17 module=nil, %% the current module
19 18 function=nil, %% the current function
20 19 recur=nil, %% the current loop function to be recurred
2  lib/elixir/lib/kernel.ex
@@ -2872,7 +2872,7 @@ defmodule Kernel do
2872 2872 defdelegate other_reverse(list), to: :lists, as: :reverse
2873 2873 end
2874 2874
2875   - My:lists.reverse([1,2,3])
  2875 + MyList.reverse([1,2,3])
2876 2876 #=> [3,2,1]
2877 2877
2878 2878 MyList.other_reverse([1,2,3])
34 lib/elixir/lib/kernel/special_forms.ex
@@ -139,8 +139,8 @@ defmodule Kernel.SpecialForms do
139 139
140 140 import :all, List
141 141
142   - It can also be customized to import only functions or only
143   - macros:
  142 + It can also be customized to import only all functions or
  143 + all macros:
144 144
145 145 import :functions, List
146 146 import :macros, List
@@ -448,11 +448,39 @@ defmodule Kernel.SpecialForms do
448 448
449 449 @doc """
450 450 This is the special form used to hold aliases information.
451   - At compilation time, it is usually compiled to an atom:
  451 + It is usually compiled to an atom:
452 452
453 453 quote do: Foo.Bar
454 454 { :__aliases__, 0, [:Foo,:Bar] }
455 455
  456 + Elixir represents `Foo.Bar` as `__aliases__` so calls can be
  457 + unambiguously identified by the operator `:.`. For example:
  458 +
  459 + quote do: Foo.bar
  460 + {{:.,0,[{:__aliases__,0,[:Foo]},:bar]},0,[]}
  461 +
  462 + Whenever an expression iterator sees a `:.` as the tuple key,
  463 + it can be sure that it represents a call and the second element
  464 + of the arguments list is an atom.
  465 +
  466 + On the other hand, aliases holds some properties:
  467 +
  468 + 1) The head element of aliases can be any term;
  469 +
  470 + 2) The tail elements of aliases are guaranteed to always be atoms;
  471 +
  472 + 3) When the head element of aliases is the atom :Elixir, no expansion happen;
  473 +
  474 + 4) When the head element of aliases is not an atom, it is expanded at runtime:
  475 +
  476 + quote do: some_var.Foo
  477 + {:__aliases__,0,[{:some_var,0,:quoted},:Bar]}
  478 +
  479 + Since `some_var` is not available at compilation time, the compiler
  480 + expands such expression to:
  481 +
  482 + Module.concat [some_var, Foo]
  483 +
456 484 """
457 485 defmacro __aliases__(args)
458 486 end
6 lib/elixir/lib/module.ex
@@ -316,7 +316,7 @@ defmodule Module do
316 316 assert_not_compiled!(:defines?, module)
317 317 table = function_table_for(module)
318 318 case ETS.lookup(table, tuple) do
319   - [{ _, ^kind, _, _, _, _, _, _ }] -> true
  319 + [{ _, ^kind, _, _, _, _, _ }] -> true
320 320 _ -> false
321 321 end
322 322 end
@@ -335,7 +335,7 @@ defmodule Module do
335 335 def definitions_in(module) do
336 336 assert_not_compiled!(:definitions_in, module)
337 337 table = function_table_for(module)
338   - lc { tuple, _, _, _, _, _, _, _ } inlist ETS.tab2list(table), do: tuple
  338 + lc { tuple, _, _, _, _, _, _ } inlist ETS.tab2list(table), do: tuple
339 339 end
340 340
341 341 @doc """
@@ -354,7 +354,7 @@ defmodule Module do
354 354 def definitions_in(module, kind) do
355 355 assert_not_compiled!(:definitions_in, module)
356 356 table = function_table_for(module)
357   - lc { tuple, stored_kind, _, _, _, _, _, _ } inlist ETS.tab2list(table), stored_kind == kind, do: tuple
  357 + lc { tuple, stored_kind, _, _, _, _, _ } inlist ETS.tab2list(table), stored_kind == kind, do: tuple
358 358 end
359 359
360 360 @doc """
113 lib/elixir/lib/string.ex
... ... @@ -1,8 +1,14 @@
1 1 defmodule String do
2 2 @moduledoc """
3 3 A string in Elixir is a utf-8 binary. This module
4   - contains function to work with utf-8 data and its
5   - codepoints.
  4 + contains function to work with utf-8 data, its
  5 + codepoints and graphemes.
  6 +
  7 + Notice that graphemes is a superset of UTF-8 codepoints
  8 + which also contains named sequences as defined per
  9 + http://www.unicode.org/reports/tr34/. In short, graphemes
  10 + also contain multiple characters that are "perceived as
  11 + a single character" by readers.
6 12
7 13 For working with raw binaries, use Erlang's :binary
8 14 module.
@@ -90,7 +96,6 @@ defmodule String do
90 96 def printable?(<<>>), do: true
91 97 def printable?(_), do: false
92 98
93   -
94 99 @doc """
95 100 Divides a string into sub string based on a pattern,
96 101 returning a list of these sub string. The pattern can
@@ -285,18 +290,47 @@ defmodule String do
285 290 String.codepoints("ἅἪῼ") #=> ["ἅ","Ἢ","ῼ"]
286 291
287 292 """
288   - def codepoints(string) do
289   - do_codepoints(codepoint(string))
290   - end
  293 + defdelegate codepoints(string), to: String.Unicode
291 294
292   - defp do_codepoints({char, rest}) do
293   - [char|do_codepoints(codepoint(rest))]
294   - end
  295 + @doc """
  296 + Returns the next codepoint in a String.
  297 +
  298 + The result is a tuple with the codepoint and the
  299 + remaining of the string or `:no_codepoint` in case
  300 + the String reached its end.
  301 +
  302 + ## Examples
  303 +
  304 + String.next_codepoint("josé") #=> { "j", "osé" }
  305 +
  306 + """
  307 + defdelegate next_codepoint(string), to: String.Unicode
  308 +
  309 + @doc """
  310 + Returns unicode graphemes in the string
  311 +
  312 + ## Examples
  313 + String.graphemes("Ā̀stute") # => ["Ā̀","s","t","u","t","e"]
295 314
296   - defp do_codepoints(:no_codepoint), do: []
  315 + """
  316 + defdelegate graphemes(string), to: String.Unicode
297 317
298 318 @doc """
299   - Returns the first codepoint from an utf8 string.
  319 + Returns the next grapheme in a String.
  320 +
  321 + The result is a tuple with the grapheme and the
  322 + remaining of the string or `:no_grapheme` in case
  323 + the String reached its end.
  324 +
  325 + ## Examples
  326 +
  327 + String.next_grapheme("josé") #=> { "j", "osé" }
  328 +
  329 + """
  330 + defdelegate next_grapheme(string), to: String.Unicode
  331 +
  332 + @doc """
  333 + Returns the first grapheme from an utf8 string.
300 334
301 335 ## Examples
302 336
@@ -305,14 +339,14 @@ defmodule String do
305 339
306 340 """
307 341 def first(string) do
308   - case codepoint(string) do
  342 + case next_grapheme(string) do
309 343 { char, _ } -> char
310   - :no_codepoint -> ""
  344 + :no_grapheme -> ""
311 345 end
312 346 end
313 347
314 348 @doc """
315   - Returns the last codepoint from an utf8 string.
  349 + Returns the last grapheme from an utf8 string.
316 350
317 351 ## Examples
318 352
@@ -321,17 +355,17 @@ defmodule String do
321 355
322 356 """
323 357 def last(string) do
324   - do_last(codepoint(string), "")
  358 + do_last(next_grapheme(string), "")
325 359 end
326 360
327 361 defp do_last({char, rest}, _) do
328   - do_last(codepoint(rest), char)
  362 + do_last(next_grapheme(rest), char)
329 363 end
330 364
331   - defp do_last(:no_codepoint, last_char), do: last_char
  365 + defp do_last(:no_grapheme, last_char), do: last_char
332 366
333 367 @doc """
334   - Returns the number of codepoint in an utf8 string.
  368 + Returns the number of unicode graphemes in an utf8 string.
335 369
336 370 ## Examples
337 371
@@ -340,17 +374,17 @@ defmodule String do
340 374
341 375 """
342 376 def length(string) do
343   - do_length(codepoint(string))
  377 + do_length(next_grapheme(string))
344 378 end
345 379
346 380 defp do_length({_, rest}) do
347   - 1 + do_length(codepoint(rest))
  381 + 1 + do_length(next_grapheme(rest))
348 382 end
349 383
350   - defp do_length(:no_codepoint), do: 0
  384 + defp do_length(:no_grapheme), do: 0
351 385
352 386 @doc """
353   - Returns the codepoint in the `position` of the given utf8 `string`.
  387 + Returns the grapheme in the `position` of the given utf8 `string`.
354 388 If `position` is greater than `string` length, than it returns `nil`.
355 389
356 390 ## Examples
@@ -359,51 +393,28 @@ defmodule String do
359 393 String.at("elixir", 1) #=> "l"
360 394 String.at("elixir", 10) #=> nil
361 395 String.at("elixir", -1) #=> "r"
362   - String.at("elixir", -10) #=> "nil"
  396 + String.at("elixir", -10) #=> nil
363 397
364 398 """
365 399 def at(string, position) when position >= 0 do
366   - do_at(codepoint(string), position, 0)
  400 + do_at(next_grapheme(string), position, 0)
367 401 end
368 402
369 403 def at(string, position) when position < 0 do
370   - real_pos = do_length(codepoint(string)) - abs(position)
  404 + real_pos = do_length(next_grapheme(string)) - abs(position)
371 405 case real_pos >= 0 do
372   - true -> do_at(codepoint(string), real_pos, 0)
  406 + true -> do_at(next_grapheme(string), real_pos, 0)
373 407 false -> ""
374 408 end
375 409 end
376 410
377 411 defp do_at({_ , rest}, desired_pos, current_pos) when desired_pos > current_pos do
378   - do_at(codepoint(rest), desired_pos, current_pos + 1)
  412 + do_at(next_grapheme(rest), desired_pos, current_pos + 1)
379 413 end
380 414
381 415 defp do_at({char, _}, desired_pos, current_pos) when desired_pos == current_pos do
382 416 char
383 417 end
384 418
385   - defp do_at(:no_codepoint, _, _), do: ""
386   -
387   - # Private implementation which returns the first codepoint
388   - # of any given utf8 string and the rest of it
389   - # If an empty string is given, :no_codepoint is returned.
390   - defp codepoint(<<194, char, rest :: binary>>)
391   - when char in 161..191,
392   - do: { <<194, char>>, rest }
393   -
394   - defp codepoint(<<first, char, rest :: binary>>)
395   - when first in 195..223 and char in 128..191,
396   - do: { <<first, char>>, rest }
397   -
398   - defp codepoint(<<first, second, char, rest :: binary>>)
399   - when first == 224 and second in 160..191 and char in 128..191,
400   - do: { <<first, second, char>>, rest }
401   -
402   - defp codepoint(<<first, second, char, rest :: binary>>)
403   - when first in 225..239 and second in 128..191 and char in 128..191,
404   - do: { <<first, second, char>>, rest }
405   -
406   - defp codepoint(<<other, rest :: binary>>), do: { <<other>>, rest }
407   -
408   - defp codepoint(<<>>), do: :no_codepoint
  419 + defp do_at(:no_grapheme, _, _), do: nil
409 420 end
420 lib/elixir/priv/NamedSequences.txt
... ... @@ -0,0 +1,420 @@
  1 +LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300
  2 +LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300
  3 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329
  4 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329
  5 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329
  6 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329
  7 +LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329
  8 +LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329
  9 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304
  10 +LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304
  11 +LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C
  12 +LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C
  13 +LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300
  14 +LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300
  15 +LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301
  16 +LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067
  17 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329
  18 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329
  19 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329
  20 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329
  21 +LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329
  22 +LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329
  23 +LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329
  24 +LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329
  25 +LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300
  26 +LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300
  27 +LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301
  28 +LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301
  29 +LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303
  30 +LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303
  31 +LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301
  32 +LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301
  33 +LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303
  34 +LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303
  35 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301
  36 +LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301
  37 +LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303
  38 +LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303
  39 +LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300
  40 +LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303
  41 +LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301
  42 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301
  43 +LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303
  44 +LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303
  45 +LATIN CAPITAL LETTER J WITH TILDE;004A 0303
  46 +LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303
  47 +LATIN CAPITAL LETTER L WITH TILDE;004C 0303
  48 +LATIN SMALL LETTER L WITH TILDE;006C 0303
  49 +LATIN CAPITAL LETTER M WITH TILDE;004D 0303
  50 +LATIN SMALL LETTER M WITH TILDE;006D 0303
  51 +LATIN CAPITAL LETTER R WITH TILDE;0052 0303
  52 +LATIN SMALL LETTER R WITH TILDE;0072 0303
  53 +LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301
  54 +LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301
  55 +LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303
  56 +LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303
  57 +LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301
  58 +LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301
  59 +LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303
  60 +LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303
  61 +LATIN SMALL LETTER AE WITH GRAVE;00E6 0300
  62 +LATIN SMALL LETTER OPEN O WITH GRAVE;0254 0300
  63 +LATIN SMALL LETTER OPEN O WITH ACUTE;0254 0301
  64 +LATIN SMALL LETTER TURNED V WITH GRAVE;028C 0300
  65 +LATIN SMALL LETTER TURNED V WITH ACUTE;028C 0301
  66 +LATIN SMALL LETTER SCHWA WITH GRAVE;0259 0300
  67 +LATIN SMALL LETTER SCHWA WITH ACUTE;0259 0301
  68 +LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE;025A 0300
  69 +LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE;025A 0301
  70 +BENGALI LETTER KHINYA;0995 09CD 09B7
  71 +TAMIL CONSONANT K; 0B95 0BCD
  72 +TAMIL CONSONANT NG; 0B99 0BCD
  73 +TAMIL CONSONANT C; 0B9A 0BCD
  74 +TAMIL CONSONANT NY; 0B9E 0BCD
  75 +TAMIL CONSONANT TT; 0B9F 0BCD
  76 +TAMIL CONSONANT NN; 0BA3 0BCD
  77 +TAMIL CONSONANT T; 0BA4 0BCD
  78 +TAMIL CONSONANT N; 0BA8 0BCD
  79 +TAMIL CONSONANT P; 0BAA 0BCD
  80 +TAMIL CONSONANT M; 0BAE 0BCD
  81 +TAMIL CONSONANT Y; 0BAF 0BCD
  82 +TAMIL CONSONANT R; 0BB0 0BCD
  83 +TAMIL CONSONANT L; 0BB2 0BCD
  84 +TAMIL CONSONANT V; 0BB5 0BCD
  85 +TAMIL CONSONANT LLL;0BB4 0BCD
  86 +TAMIL CONSONANT LL; 0BB3 0BCD
  87 +TAMIL CONSONANT RR; 0BB1 0BCD
  88 +TAMIL CONSONANT NNN;0BA9 0BCD
  89 +TAMIL CONSONANT J; 0B9C 0BCD
  90 +TAMIL CONSONANT SH; 0BB6 0BCD
  91 +TAMIL CONSONANT SS; 0BB7 0BCD
  92 +TAMIL CONSONANT S; 0BB8 0BCD
  93 +TAMIL CONSONANT H; 0BB9 0BCD
  94 +TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD
  95 +TAMIL SYLLABLE KAA; 0B95 0BBE
  96 +TAMIL SYLLABLE KI; 0B95 0BBF
  97 +TAMIL SYLLABLE KII; 0B95 0BC0
  98 +TAMIL SYLLABLE KU; 0B95 0BC1
  99 +TAMIL SYLLABLE KUU; 0B95 0BC2
  100 +TAMIL SYLLABLE KE; 0B95 0BC6
  101 +TAMIL SYLLABLE KEE; 0B95 0BC7
  102 +TAMIL SYLLABLE KAI; 0B95 0BC8
  103 +TAMIL SYLLABLE KO; 0B95 0BCA
  104 +TAMIL SYLLABLE KOO; 0B95 0BCB
  105 +TAMIL SYLLABLE KAU; 0B95 0BCC
  106 +TAMIL SYLLABLE NGAA; 0B99 0BBE
  107 +TAMIL SYLLABLE NGI; 0B99 0BBF
  108 +TAMIL SYLLABLE NGII; 0B99 0BC0
  109 +TAMIL SYLLABLE NGU; 0B99 0BC1
  110 +TAMIL SYLLABLE NGUU; 0B99 0BC2
  111 +TAMIL SYLLABLE NGE; 0B99 0BC6
  112 +TAMIL SYLLABLE NGEE; 0B99 0BC7
  113 +TAMIL SYLLABLE NGAI; 0B99 0BC8
  114 +TAMIL SYLLABLE NGO; 0B99 0BCA
  115 +TAMIL SYLLABLE NGOO; 0B99 0BCB
  116 +TAMIL SYLLABLE NGAU; 0B99 0BCC
  117 +TAMIL SYLLABLE CI; 0B9A 0BBF
  118 +TAMIL SYLLABLE CII; 0B9A 0BC0
  119 +TAMIL SYLLABLE CU; 0B9A 0BC1
  120 +TAMIL SYLLABLE CUU; 0B9A 0BC2
  121 +TAMIL SYLLABLE CE; 0B9A 0BC6
  122 +TAMIL SYLLABLE CEE; 0B9A 0BC7
  123 +TAMIL SYLLABLE CAI; 0B9A 0BC8
  124 +TAMIL SYLLABLE CO; 0B9A 0BCA
  125 +TAMIL SYLLABLE COO; 0B9A 0BCB
  126 +TAMIL SYLLABLE CAU; 0B9A 0BCC
  127 +TAMIL SYLLABLE NYAA; 0B9E 0BBE
  128 +TAMIL SYLLABLE NYI; 0B9E 0BBF
  129 +TAMIL SYLLABLE NYII; 0B9E 0BC0
  130 +TAMIL SYLLABLE NYU; 0B9E 0BC1
  131 +TAMIL SYLLABLE NYUU; 0B9E 0BC2
  132 +TAMIL SYLLABLE NYE; 0B9E 0BC6
  133 +TAMIL SYLLABLE NYEE; 0B9E 0BC7
  134 +TAMIL SYLLABLE NYAI; 0B9E 0BC8
  135 +TAMIL SYLLABLE NYO; 0B9E 0BCA
  136 +TAMIL SYLLABLE NYOO; 0B9E 0BCB
  137 +TAMIL SYLLABLE NYAU; 0B9E 0BCC
  138 +TAMIL SYLLABLE TTAA; 0B9F 0BBE
  139 +TAMIL SYLLABLE TTI; 0B9F 0BBF
  140 +TAMIL SYLLABLE TTII; 0B9F 0BC0
  141 +TAMIL SYLLABLE TTU; 0B9F 0BC1
  142 +TAMIL SYLLABLE TTUU; 0B9F 0BC2
  143 +TAMIL SYLLABLE TTE; 0B9F 0BC6
  144 +TAMIL SYLLABLE TTEE; 0B9F 0BC7
  145 +TAMIL SYLLABLE TTAI; 0B9F 0BC8
  146 +TAMIL SYLLABLE TTO; 0B9F 0BCA
  147 +TAMIL SYLLABLE TTOO; 0B9F 0BCB
  148 +TAMIL SYLLABLE TTAU; 0B9F 0BCC
  149 +TAMIL SYLLABLE NNAA; 0BA3 0BBE
  150 +TAMIL SYLLABLE NNI; 0BA3 0BBF
  151 +TAMIL SYLLABLE NNII; 0BA3 0BC0
  152 +TAMIL SYLLABLE NNU; 0BA3 0BC1
  153 +TAMIL SYLLABLE NNUU; 0BA3 0BC2
  154 +TAMIL SYLLABLE NNE; 0BA3 0BC6
  155 +TAMIL SYLLABLE NNEE; 0BA3 0BC7
  156 +TAMIL SYLLABLE NNAI; 0BA3 0BC8
  157 +TAMIL SYLLABLE NNO; 0BA3 0BCA
  158 +TAMIL SYLLABLE NNOO; 0BA3 0BCB
  159 +TAMIL SYLLABLE NNAU; 0BA3 0BCC
  160 +TAMIL SYLLABLE TAA; 0BA4 0BBE
  161 +TAMIL SYLLABLE TI; 0BA4 0BBF
  162 +TAMIL SYLLABLE TII; 0BA4 0BC0
  163 +TAMIL SYLLABLE TU; 0BA4 0BC1
  164 +TAMIL SYLLABLE TUU; 0BA4 0BC2
  165 +TAMIL SYLLABLE TE; 0BA4 0BC6
  166 +TAMIL SYLLABLE TEE; 0BA4 0BC7
  167 +TAMIL SYLLABLE TAI; 0BA4 0BC8
  168 +TAMIL SYLLABLE TO; 0BA4 0BCA
  169 +TAMIL SYLLABLE TOO; 0BA4 0BCB
  170 +TAMIL SYLLABLE TAU; 0BA4 0BCC
  171 +TAMIL SYLLABLE NAA; 0BA8 0BBE
  172 +TAMIL SYLLABLE NI; 0BA8 0BBF
  173 +TAMIL SYLLABLE NII; 0BA8 0BC0
  174 +TAMIL SYLLABLE NU; 0BA8 0BC1
  175 +TAMIL SYLLABLE NUU; 0BA8 0BC2
  176 +TAMIL SYLLABLE NE; 0BA8 0BC6
  177 +TAMIL SYLLABLE NEE; 0BA8 0BC7
  178 +TAMIL SYLLABLE NAI; 0BA8 0BC8
  179 +TAMIL SYLLABLE NO; 0BA8 0BCA
  180 +TAMIL SYLLABLE NOO; 0BA8 0BCB
  181 +TAMIL SYLLABLE NAU; 0BA8 0BCC
  182 +TAMIL SYLLABLE PAA; 0BAA 0BBE
  183 +TAMIL SYLLABLE PI; 0BAA 0BBF
  184 +TAMIL SYLLABLE PII; 0BAA 0BC0
  185 +TAMIL SYLLABLE PU; 0BAA 0BC1
  186 +TAMIL SYLLABLE PUU; 0BAA 0BC2
  187 +TAMIL SYLLABLE PE; 0BAA 0BC6
  188 +TAMIL SYLLABLE PEE; 0BAA 0BC7
  189 +TAMIL SYLLABLE PAI; 0BAA 0BC8
  190 +TAMIL SYLLABLE PO; 0BAA 0BCA
  191 +TAMIL SYLLABLE POO; 0BAA 0BCB
  192 +TAMIL SYLLABLE PAU; 0BAA 0BCC
  193 +TAMIL SYLLABLE MAA; 0BAE 0BBE
  194 +TAMIL SYLLABLE MI; 0BAE 0BBF
  195 +TAMIL SYLLABLE MII; 0BAE 0BC0
  196 +TAMIL SYLLABLE MU; 0BAE 0BC1
  197 +TAMIL SYLLABLE MUU; 0BAE 0BC2
  198 +TAMIL SYLLABLE ME; 0BAE 0BC6
  199 +TAMIL SYLLABLE MEE; 0BAE 0BC7
  200 +TAMIL SYLLABLE MAI; 0BAE 0BC8
  201 +TAMIL SYLLABLE MO; 0BAE 0BCA
  202 +TAMIL SYLLABLE MOO; 0BAE 0BCB
  203 +TAMIL SYLLABLE MAU; 0BAE 0BCC
  204 +TAMIL SYLLABLE YAA; 0BAF 0BBE
  205 +TAMIL SYLLABLE YI; 0BAF 0BBF
  206 +TAMIL SYLLABLE YII; 0BAF 0BC0
  207 +TAMIL SYLLABLE YU; 0BAF 0BC1
  208 +TAMIL SYLLABLE YUU; 0BAF 0BC2
  209 +TAMIL SYLLABLE YE; 0BAF 0BC6
  210 +TAMIL SYLLABLE YEE; 0BAF 0BC7
  211 +TAMIL SYLLABLE YAI; 0BAF 0BC8
  212 +TAMIL SYLLABLE YO; 0BAF 0BCA
  213 +TAMIL SYLLABLE YOO; 0BAF 0BCB
  214 +TAMIL SYLLABLE YAU; 0BAF 0BCC
  215 +TAMIL SYLLABLE RAA; 0BB0 0BBE
  216 +TAMIL SYLLABLE RI; 0BB0 0BBF
  217 +TAMIL SYLLABLE RII; 0BB0 0BC0
  218 +TAMIL SYLLABLE RU; 0BB0 0BC1
  219 +TAMIL SYLLABLE RUU; 0BB0 0BC2
  220 +TAMIL SYLLABLE RE; 0BB0 0BC6
  221 +TAMIL SYLLABLE REE; 0BB0 0BC7
  222 +TAMIL SYLLABLE RAI; 0BB0 0BC8
  223 +TAMIL SYLLABLE RO; 0BB0 0BCA
  224 +TAMIL SYLLABLE ROO; 0BB0 0BCB
  225 +TAMIL SYLLABLE RAU; 0BB0 0BCC
  226 +TAMIL SYLLABLE LAA; 0BB2 0BBE
  227 +TAMIL SYLLABLE LI; 0BB2 0BBF
  228 +TAMIL SYLLABLE LII; 0BB2 0BC0
  229 +TAMIL SYLLABLE LU; 0BB2 0BC1
  230 +TAMIL SYLLABLE LUU; 0BB2 0BC2
  231 +TAMIL SYLLABLE LE; 0BB2 0BC6
  232 +TAMIL SYLLABLE LEE; 0BB2 0BC7
  233 +TAMIL SYLLABLE LAI; 0BB2 0BC8
  234 +TAMIL SYLLABLE LO; 0BB2 0BCA
  235 +TAMIL SYLLABLE LOO; 0BB2 0BCB
  236 +TAMIL SYLLABLE LAU; 0BB2 0BCC
  237 +TAMIL SYLLABLE VAA; 0BB5 0BBE
  238 +TAMIL SYLLABLE VI; 0BB5 0BBF
  239 +TAMIL SYLLABLE VII; 0BB5 0BC0
  240 +TAMIL SYLLABLE VU; 0BB5 0BC1
  241 +TAMIL SYLLABLE VUU; 0BB5 0BC2
  242 +TAMIL SYLLABLE VE; 0BB5 0BC6
  243 +TAMIL SYLLABLE VEE; 0BB5 0BC7
  244 +TAMIL SYLLABLE VAI; 0BB5 0BC8
  245 +TAMIL SYLLABLE VO; 0BB5 0BCA
  246 +TAMIL SYLLABLE VOO; 0BB5 0BCB
  247 +TAMIL SYLLABLE VAU; 0BB5 0BCC
  248 +TAMIL SYLLABLE LLLAA; 0BB4 0BBE
  249 +TAMIL SYLLABLE LLLI; 0BB4 0BBF
  250 +TAMIL SYLLABLE LLLII; 0BB4 0BC0
  251 +TAMIL SYLLABLE LLLU; 0BB4 0BC1
  252 +TAMIL SYLLABLE LLLUU; 0BB4 0BC2
  253 +TAMIL SYLLABLE LLLE; 0BB4 0BC6
  254 +TAMIL SYLLABLE LLLEE; 0BB4 0BC7
  255 +TAMIL SYLLABLE LLLAI; 0BB4 0BC8
  256 +TAMIL SYLLABLE LLLO; 0BB4 0BCA
  257 +TAMIL SYLLABLE LLLOO; 0BB4 0BCB
  258 +TAMIL SYLLABLE LLLAU; 0BB4 0BCC
  259 +TAMIL SYLLABLE LLAA; 0BB3 0BBE
  260 +TAMIL SYLLABLE LLI; 0BB3 0BBF
  261 +TAMIL SYLLABLE LLII; 0BB3 0BC0
  262 +TAMIL SYLLABLE LLU; 0BB3 0BC1
  263 +TAMIL SYLLABLE LLUU; 0BB3 0BC2
  264 +TAMIL SYLLABLE LLE; 0BB3 0BC6
  265 +TAMIL SYLLABLE LLEE; 0BB3 0BC7
  266 +TAMIL SYLLABLE LLAI; 0BB3 0BC8
  267 +TAMIL SYLLABLE LLO; 0BB3 0BCA
  268 +TAMIL SYLLABLE LLOO; 0BB3 0BCB
  269 +TAMIL SYLLABLE LLAU; 0BB3 0BCC
  270 +TAMIL SYLLABLE RRAA; 0BB1 0BBE
  271 +TAMIL SYLLABLE RRI; 0BB1 0BBF
  272 +TAMIL SYLLABLE RRII; 0BB1 0BC0
  273 +TAMIL SYLLABLE RRU; 0BB1 0BC1
  274 +TAMIL SYLLABLE RRUU; 0BB1 0BC2
  275 +TAMIL SYLLABLE RRE; 0BB1 0BC6
  276 +TAMIL SYLLABLE RREE; 0BB1 0BC7
  277 +TAMIL SYLLABLE RRAI; 0BB1 0BC8
  278 +TAMIL SYLLABLE RRO; 0BB1 0BCA
  279 +TAMIL SYLLABLE RROO; 0BB1 0BCB
  280 +TAMIL SYLLABLE RRAU; 0BB1 0BCC
  281 +TAMIL SYLLABLE NNNAA; 0BA9 0BBE
  282 +TAMIL SYLLABLE NNNI; 0BA9 0BBF
  283 +TAMIL SYLLABLE NNNII; 0BA9 0BC0
  284 +TAMIL SYLLABLE NNNU; 0BA9 0BC1
  285 +TAMIL SYLLABLE NNNUU; 0BA9 0BC2
  286 +TAMIL SYLLABLE NNNE; 0BA9 0BC6
  287 +TAMIL SYLLABLE NNNEE; 0BA9 0BC7
  288 +TAMIL SYLLABLE NNNAI; 0BA9 0BC8
  289 +TAMIL SYLLABLE NNNO; 0BA9 0BCA
  290 +TAMIL SYLLABLE NNNOO; 0BA9 0BCB
  291 +TAMIL SYLLABLE NNNAU; 0BA9 0BCC
  292 +TAMIL SYLLABLE JAA; 0B9C 0BBE
  293 +TAMIL SYLLABLE JI; 0B9C 0BBF
  294 +TAMIL SYLLABLE JII; 0B9C 0BC0
  295 +TAMIL SYLLABLE JU; 0B9C 0BC1
  296 +TAMIL SYLLABLE JUU; 0B9C 0BC2
  297 +TAMIL SYLLABLE JE; 0B9C 0BC6
  298 +TAMIL SYLLABLE JEE; 0B9C 0BC7
  299 +TAMIL SYLLABLE JAI; 0B9C 0BC8
  300 +TAMIL SYLLABLE JO; 0B9C 0BCA
  301 +TAMIL SYLLABLE JOO; 0B9C 0BCB
  302 +TAMIL SYLLABLE JAU; 0B9C 0BCC
  303 +TAMIL SYLLABLE SHAA; 0BB6 0BBE
  304 +TAMIL SYLLABLE SHI; 0BB6 0BBF
  305 +TAMIL SYLLABLE SHII; 0BB6 0BC0
  306 +TAMIL SYLLABLE SHU; 0BB6 0BC1
  307 +TAMIL SYLLABLE SHUU; 0BB6 0BC2
  308 +TAMIL SYLLABLE SHE; 0BB6 0BC6
  309 +TAMIL SYLLABLE SHEE; 0BB6 0BC7
  310 +TAMIL SYLLABLE SHAI; 0BB6 0BC8
  311 +TAMIL SYLLABLE SHO; 0BB6 0BCA
  312 +TAMIL SYLLABLE SHOO; 0BB6 0BCB
  313 +TAMIL SYLLABLE SHAU; 0BB6 0BCC
  314 +TAMIL SYLLABLE SSAA; 0BB7 0BBE
  315 +TAMIL SYLLABLE SSI; 0BB7 0BBF
  316 +TAMIL SYLLABLE SSII; 0BB7 0BC0
  317 +TAMIL SYLLABLE SSU; 0BB7 0BC1
  318 +TAMIL SYLLABLE SSUU; 0BB7 0BC2
  319 +TAMIL SYLLABLE SSE; 0BB7 0BC6
  320 +TAMIL SYLLABLE SSEE; 0BB7 0BC7
  321 +TAMIL SYLLABLE SSAI; 0BB7 0BC8
  322 +TAMIL SYLLABLE SSO; 0BB7 0BCA
  323 +TAMIL SYLLABLE SSOO; 0BB7 0BCB
  324 +TAMIL SYLLABLE SSAU; 0BB7 0BCC
  325 +TAMIL SYLLABLE SAA; 0BB8 0BBE
  326 +TAMIL SYLLABLE SI; 0BB8 0BBF
  327 +TAMIL SYLLABLE SII; 0BB8 0BC0
  328 +TAMIL SYLLABLE SU; 0BB8 0BC1
  329 +TAMIL SYLLABLE SUU; 0BB8 0BC2
  330 +TAMIL SYLLABLE SE; 0BB8 0BC6
  331 +TAMIL SYLLABLE SEE; 0BB8 0BC7
  332 +TAMIL SYLLABLE SAI; 0BB8 0BC8
  333 +TAMIL SYLLABLE SO; 0BB8 0BCA
  334 +TAMIL SYLLABLE SOO; 0BB8 0BCB
  335 +TAMIL SYLLABLE SAU; 0BB8 0BCC
  336 +TAMIL SYLLABLE HAA; 0BB9 0BBE
  337 +TAMIL SYLLABLE HI; 0BB9 0BBF
  338 +TAMIL SYLLABLE HII; 0BB9 0BC0
  339 +TAMIL SYLLABLE HU; 0BB9 0BC1
  340 +TAMIL SYLLABLE HUU; 0BB9 0BC2
  341 +TAMIL SYLLABLE HE; 0BB9 0BC6
  342 +TAMIL SYLLABLE HEE; 0BB9 0BC7
  343 +TAMIL SYLLABLE HAI; 0BB9 0BC8
  344 +TAMIL SYLLABLE HO; 0BB9 0BCA
  345 +TAMIL SYLLABLE HOO; 0BB9 0BCB
  346 +TAMIL SYLLABLE HAU; 0BB9 0BCC
  347 +TAMIL SYLLABLE KSSA; 0B95 0BCD 0BB7
  348 +TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE
  349 +TAMIL SYLLABLE KSSI; 0B95 0BCD 0BB7 0BBF
  350 +TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0
  351 +TAMIL SYLLABLE KSSU; 0B95 0BCD 0BB7 0BC1
  352 +TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2
  353 +TAMIL SYLLABLE KSSE; 0B95 0BCD 0BB7 0BC6
  354 +TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7
  355 +TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8
  356 +TAMIL SYLLABLE KSSO; 0B95 0BCD 0BB7 0BCA
  357 +TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB
  358 +TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC
  359 +TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0
  360 +SINHALA CONSONANT SIGN YANSAYA;0DCA 200D 0DBA
  361 +SINHALA CONSONANT SIGN RAKAARAANSAYA;0DCA 200D 0DBB
  362 +SINHALA CONSONANT SIGN REPAYA;0DBB 0DCA 200D
  363 +GEORGIAN LETTER U-BRJGU;10E3 0302
  364 +KHMER CONSONANT SIGN COENG KA;17D2 1780
  365 +KHMER CONSONANT SIGN COENG KHA;17D2 1781
  366 +KHMER CONSONANT SIGN COENG KO;17D2 1782
  367 +KHMER CONSONANT SIGN COENG KHO;17D2 1783
  368 +KHMER CONSONANT SIGN COENG NGO;17D2 1784
  369 +KHMER CONSONANT SIGN COENG CA;17D2 1785
  370 +KHMER CONSONANT SIGN COENG CHA;17D2 1786
  371 +KHMER CONSONANT SIGN COENG CO;17D2 1787
  372 +KHMER CONSONANT SIGN COENG CHO;17D2 1788
  373 +KHMER CONSONANT SIGN COENG NYO;17D2 1789
  374 +KHMER CONSONANT SIGN COENG DA;17D2 178A
  375 +KHMER CONSONANT SIGN COENG TTHA;17D2 178B
  376 +KHMER CONSONANT SIGN COENG DO;17D2 178C
  377 +KHMER CONSONANT SIGN COENG TTHO;17D2 178D
  378 +KHMER CONSONANT SIGN COENG NA;17D2 178E
  379 +KHMER CONSONANT SIGN COENG TA;17D2 178F
  380 +KHMER CONSONANT SIGN COENG THA;17D2 1790
  381 +KHMER CONSONANT SIGN COENG TO;17D2 1791
  382 +KHMER CONSONANT SIGN COENG THO;17D2 1792
  383 +KHMER CONSONANT SIGN COENG NO;17D2 1793
  384 +KHMER CONSONANT SIGN COENG BA;17D2 1794
  385 +KHMER CONSONANT SIGN COENG PHA;17D2 1795
  386 +KHMER CONSONANT SIGN COENG PO;17D2 1796
  387 +KHMER CONSONANT SIGN COENG PHO;17D2 1797
  388 +KHMER CONSONANT SIGN COENG MO;17D2 1798
  389 +KHMER CONSONANT SIGN COENG YO;17D2 1799
  390 +KHMER CONSONANT SIGN COENG RO;17D2 179A
  391 +KHMER CONSONANT SIGN COENG LO;17D2 179B
  392 +KHMER CONSONANT SIGN COENG VO;17D2 179C
  393 +KHMER CONSONANT SIGN COENG SHA;17D2 179D
  394 +KHMER CONSONANT SIGN COENG SSA;17D2 179E
  395 +KHMER CONSONANT SIGN COENG SA;17D2 179F
  396 +KHMER CONSONANT SIGN COENG HA;17D2 17A0
  397 +KHMER CONSONANT SIGN COENG LA;17D2 17A1
  398 +KHMER VOWEL SIGN COENG QA;17D2 17A2
  399 +KHMER INDEPENDENT VOWEL SIGN COENG QU;17D2 17A7
  400 +KHMER INDEPENDENT VOWEL SIGN COENG RY;17D2 17AB
  401 +KHMER INDEPENDENT VOWEL SIGN COENG RYY;17D2 17AC
  402 +KHMER INDEPENDENT VOWEL SIGN COENG QE;17D2 17AF
  403 +KHMER VOWEL SIGN OM;17BB 17C6
  404 +KHMER VOWEL SIGN AAM;17B6 17C6
  405 +HIRAGANA LETTER BIDAKUON NGA;304B 309A
  406 +HIRAGANA LETTER BIDAKUON NGI;304D 309A
  407 +HIRAGANA LETTER BIDAKUON NGU;304F 309A
  408 +HIRAGANA LETTER BIDAKUON NGE;3051 309A
  409 +HIRAGANA LETTER BIDAKUON NGO;3053 309A
  410 +KATAKANA LETTER BIDAKUON NGA;30AB 309A
  411 +KATAKANA LETTER BIDAKUON NGI;30AD 309A
  412 +KATAKANA LETTER BIDAKUON NGU;30AF 309A
  413 +KATAKANA LETTER BIDAKUON NGE;30B1 309A
  414 +KATAKANA LETTER BIDAKUON NGO;30B3 309A
  415 +KATAKANA LETTER AINU CE;30BB 309A
  416 +KATAKANA LETTER AINU TU;30C4 309A
  417 +KATAKANA LETTER AINU TO;30C8 309A
  418 +KATAKANA LETTER AINU P;31F7 309A
  419 +MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9
  420 +MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR;02E9 02E5
78 lib/elixir/priv/unicode.ex
@@ -10,9 +10,9 @@ defmodule String.Unicode do
10 10 :unicode.characters_to_binary([binary_to_integer(codepoint, 16)])
11 11 end
12 12
13   - path = File.expand_path("../UnicodeData.txt", __FILE__)
  13 + data_path = File.expand_path("../UnicodeData.txt", __FILE__)
14 14
15   - codes = Enum.reduce File.iterator!(path), [], fn(line, acc) ->
  15 + codes = Enum.reduce File.iterator!(data_path), [], fn(line, acc) ->
16 16 [ codepoint, _name, _category,
17 17 _class, _bidi, _decomposition,
18 18 _numeric_1, _numeric_2, _numeric_3,
@@ -26,6 +26,15 @@ defmodule String.Unicode do
26 26 end
27 27 end
28 28
  29 + seqs_path = File.expand_path("../NamedSequences.txt", __FILE__)
  30 +
  31 + seqs = Enum.map(File.iterator!(seqs_path), fn(line) ->
  32 + [ _name, codepoints ] = :binary.split(line, ";", [:global])
  33 + codepoints = Enum.filter(:binary.split(codepoints, " ", [:global, :trim]),
  34 + fn(x) -> size(x) > 0 end)
  35 + Enum.map(codepoints, fn(x) -> to_binary.(x) end)
  36 + end)
  37 +
29 38 # Downcase
30 39
31 40 lc { codepoint, _upper, lower } inlist codes, lower != "" do
@@ -59,4 +68,69 @@ defmodule String.Unicode do
59 68 def upcase(<< >>) do
60 69 << >>
61 70 end
  71 +
  72 + # Graphemes
  73 +
  74 + lc codepoints inlist seqs do
  75 + seq_args = quote do: [<< unquote_splicing(codepoints), t :: binary >>]
  76 + seq_code = quote do: {<< unquote_splicing(codepoints) >>, t}
  77 + def :next_grapheme, seq_args, [], do: seq_code
  78 + end
  79 +
  80 + def next_grapheme(<<>>) do
  81 + :no_grapheme
  82 + end
  83 +
  84 + def next_grapheme(binary) when is_binary(binary) do
  85 + case next_codepoint(binary) do
  86 + :no_codepoint -> :no_grapheme
  87 + other -> other
  88 + end
  89 + end
  90 +
  91 + def graphemes(binary) when is_binary(binary) do
  92 + do_graphemes(next_grapheme(binary))
  93 + end
  94 +
  95 + defp do_graphemes({ c, rest }) do
  96 + [c|do_graphemes(next_grapheme(rest))]
  97 + end
  98 +
  99 + defp do_graphemes(:no_grapheme) do
  100 + []
  101 + end
  102 +
  103 + # Codepoints
  104 +
  105 + def next_codepoint(<<194, char, rest :: binary>>)
  106 + when char in 161..191,
  107 + do: { <<194, char>>, rest }
  108 +
  109 + def next_codepoint(<<first, char, rest :: binary>>)
  110 + when first in 195..223 and char in 128..191,
  111 + do: { <<first, char>>, rest }
  112 +
  113 + def next_codepoint(<<first, second, char, rest :: binary>>)
  114 + when first == 224 and second in 160..191 and char in 128..191,
  115 + do: { <<first, second, char>>, rest }
  116 +
  117 + def next_codepoint(<<first, second, char, rest :: binary>>)
  118 + when first in 225..239 and second in 128..191 and char in 128..191,
  119 + do: { <<first, second, char>>, rest }
  120 +
  121 + def next_codepoint(<<other, rest :: binary>>), do: { <<other>>, rest }
  122 +
  123 + def next_codepoint(<<>>), do: :no_codepoint
  124 +
  125 + def codepoints(binary) when is_binary(binary) do
  126 + do_codepoints(next_codepoint(binary))
  127 + end
  128 +
  129 + defp do_codepoints({ c, rest }) do
  130 + [c|do_codepoints(next_codepoint(rest))]
  131 + end
  132 +
  133 + defp do_codepoints(:no_codepoint) do
  134 + []
  135 + end
62 136 end
21 lib/elixir/src/elixir_def.erl
@@ -6,7 +6,7 @@
6 6 reset_last/1,
7 7 wrap_definition/7,
8 8 store_definition/8,
9   - store_each/8,
  9 + store_each/7,
10 10 unwrap_stored_definitions/1,
11 11 format_error/1]).
12 12 -include("elixir.hrl").
@@ -19,7 +19,7 @@ table(Module) -> ?ELIXIR_ATOM_CONCAT([f, Module]).
19 19 build_table(Module) ->
20 20 FunctionTable = table(Module),
21 21 ets:new(FunctionTable, [set, named_table, public]),
22   - ets:insert(FunctionTable, { last, [] }),
  22 + reset_last(Module),
23 23 FunctionTable.
24 24
25 25 delete_table(Module) ->
@@ -81,7 +81,6 @@ store_definition(Kind, Line, Module, Name, Args, Guards, Body, RawS) ->
81 81
82 82 File = TS#elixir_scope.file,
83 83 Table = table(Module),
84   - Stack = TS#elixir_scope.macro,
85 84
86 85 %% Store function
87 86 if
@@ -90,10 +89,10 @@ store_definition(Kind, Line, Module, Name, Args, Guards, Body, RawS) ->
90 89 compile_super(Module, TS),
91 90 CheckClauses = S#elixir_scope.check_clauses,
92 91 store_each(CheckClauses, Kind, File, Location,
93   - Stack, Table, length(Defaults), Function)
  92 + Table, length(Defaults), Function)
94 93 end,
95 94
96   - [store_each(false, Kind, File, Location, Stack, Table, 0,
  95 + [store_each(false, Kind, File, Location, Table, 0,
97 96 function_for_default(Kind, Name, Default)) || Default <- Defaults],
98 97
99 98 { Name, Arity }.
@@ -226,13 +225,13 @@ unwrap_stored_definition([], Exports, Private, Def, Defmacro, Defmacrop, {Functi
226 225
227 226 %% Helpers
228 227
229   -function_for_stored_definition({{Name, Arity}, _, Line, _, [], _, _, Clauses}, {Functions,Tail}) ->
  228 +function_for_stored_definition({{Name, Arity}, _, Line, _, [], _, Clauses}, {Functions,Tail}) ->
230 229 {
231 230 [{ function, Line, Name, Arity, lists:reverse(Clauses) }|Functions],
232 231 Tail
233 232 };
234 233
235   -function_for_stored_definition({{Name, Arity}, _, Line, _, Location, _, _, Clauses}, {Functions,Tail}) ->
  234 +function_for_stored_definition({{Name, Arity}, _, Line, _, Location, _, Clauses}, {Functions,Tail}) ->
236 235 {
237 236 Functions,
238 237 [
@@ -252,22 +251,22 @@ function_for_default(_, Name, { clause, Line, Args, _Guards, _Exprs } = Clause)
252 251 %% This function also checks and emit warnings in case
253 252 %% the kind, of the visibility of the function changes.
254 253
255   -store_each(Check, Kind, File, Location, Stack, Table, Defaults, {function, Line, Name, Arity, Clauses}) ->
  254 +store_each(Check, Kind, File, Location, Table, Defaults, {function, Line, Name, Arity, Clauses}) ->
256 255 case ets:lookup(Table, {Name, Arity}) of
257   - [{{Name, Arity}, StoredKind, _, _, StoredLocation, StoredStack, StoredDefaults, StoredClauses}] ->
  256 + [{{Name, Arity}, StoredKind, _, _, StoredLocation, StoredDefaults, StoredClauses}] ->
258 257 FinalLocation = StoredLocation,
259 258 FinalDefaults = Defaults + StoredDefaults,
260 259 FinalClauses = Clauses ++ StoredClauses,
261 260 check_valid_kind(Line, File, Name, Arity, Kind, StoredKind),
262 261 check_valid_defaults(Line, File, Name, Arity, FinalDefaults),
263   - Check andalso (Stack == StoredStack) andalso check_valid_clause(Line, File, Name, Arity, Table);
  262 + Check andalso check_valid_clause(Line, File, Name, Arity, Table);
264 263 [] ->
265 264 FinalLocation = Location,
266 265 FinalDefaults = Defaults,
267 266 FinalClauses = Clauses,
268 267 Check andalso ets:insert(Table, { last, { Name, Arity } })
269 268 end,
270   - ets:insert(Table, {{Name, Arity}, Kind, Line, File, FinalLocation, Stack, FinalDefaults, FinalClauses}).
  269 + ets:insert(Table, {{Name, Arity}, Kind, Line, File, FinalLocation, FinalDefaults, FinalClauses}).
271 270
272 271 %% Validations
273 272
4 lib/elixir/src/elixir_def_local.erl
@@ -13,7 +13,7 @@ macro_for(_Tuple, _All, nil) -> false;
13 13
14 14 macro_for(Tuple, All, Module) ->
15 15 try ets:lookup(elixir_def:table(Module), Tuple) of
16   - [{Tuple, Kind, Line, _, _, _, _, Clauses}] when Kind == defmacro; All, Kind == defmacrop ->
  16 + [{Tuple, Kind, Line, _, _, _, Clauses}] when Kind == defmacro; All, Kind == defmacrop ->
17 17 get_function(Line, Module, Clauses);
18 18 _ ->
19 19 false
@@ -25,7 +25,7 @@ macro_for(Tuple, All, Module) ->
25 25 function_for(Module, Name, Arity) ->
26 26 Tuple = { Name, Arity },
27 27 case ets:lookup(elixir_def:table(Module), Tuple) of
28   - [{Tuple, _, Line, _, _, _, _, Clauses}] ->
  28 + [{Tuple, _, Line, _, _, _, Clauses}] ->
29 29 get_function(Line, Module, Clauses);
30 30 _ ->
31 31 [_|T] = erlang:get_stacktrace(),
4 lib/elixir/src/elixir_def_overridable.erl
@@ -63,7 +63,7 @@ store(Module, Function, GenerateName) ->
63 63 { Count, [H|T] } = orddict:fetch(Function, Overridable),
64 64 overridable(Module, orddict:store(Function, { Count, T }, Overridable)),
65 65
66   - { { Name, Arity }, Kind, Line, File, Location, Stack, Defaults, Clauses } = H,
  66 + { { Name, Arity }, Kind, Line, File, Location, Defaults, Clauses } = H,
67 67
68 68 { FinalKind, FinalName } = case GenerateName of
69 69 true -> { defp, name(Module, Function, Overridable) };
@@ -72,7 +72,7 @@ store(Module, Function, GenerateName) ->
72 72
73 73 Def = { function, Line, FinalName, Arity, Clauses },
74 74 elixir_def:store_each(false, FinalKind, File, Location,
75   - Stack, elixir_def:table(Module), Defaults, Def).
  75 + elixir_def:table(Module), Defaults, Def).
76 76
77 77 %% Store pending declarations that were not manually made concrete.
78 78
16 lib/elixir/src/elixir_dispatch.erl
@@ -59,8 +59,8 @@ dispatch_import(Line, Name, Args, S, Callback) ->
59 59 { error, internal } ->
60 60 elixir_import:record(import, Tuple, ?BUILTIN, Module),
61 61 elixir_macros:translate({ Name, Line, Args }, S);
62   - { ok, Receiver, Tree } ->
63   - translate_expansion(Line, Tree, Receiver, Name, Arity, S)
  62 + { ok, _Receiver, Tree } ->
  63 + translate_expansion(Line, Tree, S)
64 64 end;
65 65 Receiver ->
66 66 elixir_import:record(import, Tuple, Receiver, Module),
@@ -87,7 +87,7 @@ dispatch_require(Line, Receiver, Name, Args, S, Callback) ->
87 87 { error, internal } ->
88 88 elixir_macros:translate({ Name, Line, Args }, S);
89 89 { ok, Tree } ->
90   - translate_expansion(Line, Tree, Receiver, Name, Arity, S)
  90 + translate_expansion(Line, Tree, S)
91 91 end
92 92 end.
93 93
@@ -167,10 +167,12 @@ expand_macro_named(Line, Receiver, Name, Arity, Args, Module, Requires, SEnv) ->
167 167 Fun = fun Receiver:ProperName/ProperArity,
168 168 expand_macro_fun(Line, Fun, Receiver, Name, Args, Module, Requires, SEnv).
169 169
170   -translate_expansion(Line, Tree, Receiver, Name, Arity, S) ->
171   - NewS = S#elixir_scope{macro=[{Line,Receiver,Name,Arity}|S#elixir_scope.macro]},
172   - { TTree, TS } = elixir_translator:translate_each(elixir_quote:linify(Line, Tree), NewS),
173   - { TTree, TS#elixir_scope{macro=S#elixir_scope.macro} }.
  170 +translate_expansion(Line, Tree, S) ->
  171 + { TR, TS } = elixir_translator:translate_each(
  172 + elixir_quote:linify(Line, Tree),
  173 + S#elixir_scope{check_clauses=false}
  174 + ),
  175 + { TR, TS#elixir_scope{check_clauses=S#elixir_scope.check_clauses} }.
174 176
175 177 %% Helpers
176 178
2  lib/elixir/src/elixir_macros.erl
@@ -207,7 +207,7 @@ translate({defmodule, Line, [Ref, KV]}, S) ->
207 207 { TRef, S }
208 208 end,
209 209
210   - { elixir_module:translate(Line, FRef, Block, S), FS };
  210 + { elixir_module:translate(Line, FRef, Block, S#elixir_scope{check_clauses=true}), FS };
211 211
212 212 translate({Kind, Line, [Call]}, S) when ?FUNS(Kind) ->
213 213 translate({Kind, Line, [Call, nil]}, S);
2  lib/elixir/src/elixir_module.erl
@@ -25,7 +25,6 @@ scope_for_eval(Module, Opts) ->
25 25
26 26 binding_for_eval(Module, Binding) -> [{'_@MODULE',Module}|Binding].
27 27
28   -
29 28 %% TABLE METHODS
30 29
31 30 data_table(Module) ->
@@ -150,6 +149,7 @@ eval_form(Line, Module, Block, Vars, RawS) ->
150 149 { Value, NewS } = elixir_compiler:eval_forms([Block], Line, Temp, Vars, S),
151 150 elixir_def_overridable:store_pending(Module),
152 151 eval_callbacks(Line, Module, before_compile, [Module], NewS),
  152 + elixir_def_overridable:store_pending(Module),
153 153 Value.
154 154
155 155 %% Return the form with exports and function declarations.
5 lib/elixir/src/elixir_scope.erl
@@ -79,7 +79,7 @@ filename(Other) -> element(3, Other).
79 79
80 80 serialize(S) ->
81 81 elixir_tree_helpers:abstract_syntax(
82   - { S#elixir_scope.file, S#elixir_scope.functions, S#elixir_scope.check_clauses, S#elixir_scope.macro,
  82 + { S#elixir_scope.file, S#elixir_scope.functions, S#elixir_scope.check_clauses,
83 83 S#elixir_scope.requires, S#elixir_scope.macros, S#elixir_scope.aliases, S#elixir_scope.scheduled }
84 84 ).
85 85
@@ -87,12 +87,11 @@ serialize(S) ->
87 87
88 88 deserialize(Tuple) -> deserialize(Tuple, []).
89 89
90   -deserialize({ File, Functions, CheckClauses, Macro, Requires, Macros, Aliases, Scheduled }, Vars) ->
  90 +deserialize({ File, Functions, CheckClauses, Requires, Macros, Aliases, Scheduled }, Vars) ->
91 91 #elixir_scope{
92 92 file=File,
93 93 functions=Functions,
94 94 check_clauses=CheckClauses,
95   - macro=Macro,
96 95 requires=Requires,
97 96 macros=Macros,
98 97 aliases=Aliases,
34 lib/elixir/test/elixir/module_test.exs
@@ -170,6 +170,16 @@ defmodule ModuleTest do
170 170 end)
171 171 end
172 172
  173 + def __on_definition__(env, kind, name, args, guards, expr) do
  174 + Process.put(env.module, :called)
  175 + assert env.module == ModuleTest.OnDefinition
  176 + assert kind == :def
  177 + assert name == :hello
  178 + assert [{ :foo, _, _ }, { :bar, _ ,_ }] = args
  179 + assert [] = guards
  180 + assert [do: { :+, _, [{ :foo, _, _ }, { :bar, _, _ }] }] = expr
  181 + end
  182 +
173 183 test :on_definition do
174 184 defmodule OnDefinition do
175 185 @on_definition ModuleTest
@@ -182,16 +192,6 @@ defmodule ModuleTest do
182 192 assert Process.get(ModuleTest.OnDefinition) == :called
183 193 end
184 194
185   - def __on_definition__(env, kind, name, args, guards, expr) do
186   - Process.put(env.module, :called)
187   - assert env.module == ModuleTest.OnDefinition
188   - assert kind == :def
189   - assert name == :hello
190   - assert [{ :foo, _, _ }, { :bar, _ ,_ }] = args
191   - assert [] = guards
192   - assert [do: { :+, _, [{ :foo, _, _ }, { :bar, _, _ }] }] = expr
193   - end
194   -
195 195 test :create do
196 196 contents =
197 197 quote do
@@ -201,4 +201,18 @@ defmodule ModuleTest do
201 201 Module.create(ModuleCreateSample, contents, __ENV__)
202 202 assert ModuleCreateSample.world
203 203 end
  204 +
  205 + defmacro __before_compile__(_) do
  206 + quote do
  207 + def constant, do: 1
  208 + defoverridable constant: 0
  209 + end
  210 + end
  211 +
  212 + test :overridable_inside_before_compile do
  213 + defmodule OverridableWithBeforeCompile do
  214 + @before_compile ModuleTest
  215 + end
  216 + assert OverridableWithBeforeCompile.constant == 1
  217 + end
204 218 end
16 lib/elixir/test/elixir/string_test.exs
@@ -97,6 +97,15 @@ defmodule StringTest do
97 97 assert String.codepoints("ϖͲϥЫݎߟΈټϘለДШव׆ש؇؊صلټܗݎޥޘ߉ऌ૫ሏᶆ℆ℙℱ ⅚Ⅷ↠∈⌘①ffi") == ["ϖ","Ͳ","ϥ","Ы","ݎ","ߟ","Έ","ټ","Ϙ","ለ","Д","Ш","व","׆","ש","؇","؊","ص","ل","ټ","ܗ","ݎ","ޥ","ޘ","߉","ऌ","૫","ሏ","ᶆ","℆","ℙ","ℱ"," ","⅚","Ⅷ","↠","∈","⌘","①","ffi"]
98 98 end
99 99
  100 + test :graphemes do
  101 + assert String.graphemes("Ā̀stute") == ["Ā̀","s","t","u","t","e"]
  102 + end
  103 +
  104 + test :next_grapheme do
  105 + assert String.next_grapheme("Ā̀stute") == {"Ā̀","stute"}
  106 + assert String.next_grapheme("") == :no_grapheme
  107 + end
  108 +
100 109 test :first do
101 110 assert String.first("elixir") == "e"
102 111 assert String.first("íelixr") == "í"
@@ -105,6 +114,7 @@ defmodule StringTest do
105 114 assert String.first("ελιξήριο") == "ε"
106 115 assert String.first("סם חיים") == "ס"
107 116 assert String.first("がガちゃ") == "が"
  117 + assert String.first("Ā̀stute") == "Ā̀"
108 118 assert String.first("") == ""
109 119 end
110 120
@@ -117,6 +127,7 @@ defmodule StringTest do
117 127 assert String.last("סם ייםח") == "ח"
118 128 assert String.last("がガちゃ") == "ゃ"
119 129 assert String.last("") == ""
  130 + assert String.last("Ā̀") == "Ā̀"
120 131 end
121 132
122 133 test :length do