-
Notifications
You must be signed in to change notification settings - Fork 4
/
combining_class.ex
155 lines (120 loc) · 3.67 KB
/
combining_class.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
defmodule Unicode.CanonicalCombiningClass do
@moduledoc """
Functions to introspect Unicode
canonical combining classes for binaries
(Strings) and codepoints.
"""
@behaviour Unicode.Property.Behaviour
alias Unicode.Utils
@combining_classes Utils.combining_classes()
|> Utils.remove_annotations()
@doc """
Returns the map of Unicode
canonical combining classes..
The class name is the map
key and a list of codepoint
ranges as tuples as the value.
"""
def combining_classes do
@combining_classes
end
@doc """
Returns a list of known Unicode
canonical combining class names.
This function does not return the
names of any class aliases.
"""
@known_combining_classes Map.keys(@combining_classes)
def known_combining_classes do
@known_combining_classes
end
@combining_class_alias Utils.property_value_alias()
|> Map.get("ccc")
|> Enum.map(fn {k, v} -> {k, String.to_integer(v)} end)
|> Map.new()
|> Utils.downcase_keys_and_remove_whitespace()
|> Utils.add_canonical_alias()
@doc """
Returns a map of aliases for
Unicode canonical combining classes..
An alias is an alternative name
for referring to a class. Aliases
are resolved by the `fetch/1` and
`get/1` functions.
"""
@impl Unicode.Property.Behaviour
def aliases do
@combining_class_alias
end
@doc """
Returns the Unicode ranges for
a given canonical combining class
as a list of ranges as 2-tuples.
Aliases are resolved by this function.
Returns either `{:ok, range_list}` or
`:error`.
"""
@impl Unicode.Property.Behaviour
def fetch(combining_class) when is_atom(combining_class) do
Map.fetch(combining_classes(), combining_class)
end
def fetch(combining_class) when is_binary(combining_class) do
combining_class = Utils.downcase_and_remove_whitespace(combining_class)
combining_class = Map.get(aliases(), combining_class, combining_class)
Map.fetch(combining_classes(), combining_class)
end
def fetch(combining_class) when is_integer(combining_class) do
Map.fetch(combining_classes(), combining_class)
end
@doc """
Returns the Unicode ranges for
a given canonical combining class
as a list of ranges as 2-tuples.
Aliases are resolved by this function.
Returns either `range_list` or
`nil`.
"""
@impl Unicode.Property.Behaviour
def get(combining_class) do
case fetch(combining_class) do
{:ok, combining_class} -> combining_class
_ -> nil
end
end
@doc """
Returns the count of the number of characters
for a given canonical combining class.
## Example
iex> Unicode.CanonicalCombiningClass.count(230)
508
"""
@impl Unicode.Property.Behaviour
def count(class) do
with {:ok, class} <- fetch(class) do
Enum.reduce(class, 0, fn {from, to}, acc -> acc + to - from + 1 end)
end
end
@doc """
Returns the canonical combining class
name(s) for the given binary or codepoint.
In the case of a codepoint, a single
class name is returned.
For a binary a list of distinct class
names represented by the graphemes in
the binary is returned.
"""
def combining_class(string) when is_binary(string) do
string
|> String.to_charlist()
|> Enum.map(&combining_class/1)
|> Enum.uniq()
end
for {combining_class, ranges} <- @combining_classes do
def combining_class(codepoint) when unquote(Utils.ranges_to_guard_clause(ranges)) do
unquote(combining_class)
end
end
def combining_class(codepoint) when is_integer(codepoint) and codepoint in 0..0x10FFFF do
0
end
end