-
Notifications
You must be signed in to change notification settings - Fork 4
/
script.ex
151 lines (117 loc) · 2.98 KB
/
script.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
defmodule Unicode.Script do
@moduledoc """
Functions to introspect Unicode
scripts for binaries
(Strings) and codepoints.
"""
@behaviour Unicode.Property.Behaviour
alias Unicode.Utils
@scripts Utils.scripts()
|> Utils.remove_annotations()
@doc """
Returns the map of Unicode
scripts.
The script name is the map
key and a list of codepoint
ranges as tuples as the value.
"""
def scripts do
@scripts
end
@doc """
Returns a list of known Unicode
script names.
This function does not return the
names of any script aliases.
"""
@known_scripts Map.keys(@scripts)
def known_scripts do
@known_scripts
end
@script_alias Utils.property_value_alias()
|> Map.get("sc")
|> Utils.invert_map()
|> Utils.atomize_values()
|> Utils.downcase_keys_and_remove_whitespace()
|> Utils.add_canonical_alias()
@doc """
Returns a map of aliases for
Unicode scripts.
An alias is an alternative name
for referring to a script. Aliases
are resolved by the `fetch/1` and
`get/1` functions.
"""
@impl Unicode.Property.Behaviour
def aliases do
@script_alias
end
@doc """
Returns the Unicode ranges for
a given script as a list of
ranges as 2-tuples.
Aliases are resolved by this function.
Returns either `{:ok, range_list}` or
`:error`.
"""
@impl Unicode.Property.Behaviour
def fetch(script) when is_atom(script) do
Map.fetch(scripts(), script)
end
def fetch(script) do
script = Utils.downcase_and_remove_whitespace(script)
script = Map.get(aliases(), script, script)
Map.fetch(scripts(), script)
end
@doc """
Returns the Unicode ranges for
a given script as a list of
ranges as 2-tuples.
Aliases are resolved by this function.
Returns either `range_list` or
`nil`.
"""
@impl Unicode.Property.Behaviour
def get(script) do
case fetch(script) do
{:ok, script} -> script
_ -> nil
end
end
@doc """
Returns the count of the number of characters
for a given script.
## Example
iex> Unicode.Script.count("mongolian")
168
"""
@impl Unicode.Property.Behaviour
def count(script) do
with {:ok, script} <- fetch(script) do
Enum.reduce(script, 0, fn {from, to}, acc -> acc + to - from + 1 end)
end
end
@doc """
Returns the script name(s) for the
given binary or codepoint.
In the case of a codepoint, a single
script name is returned.
For a binary a list of distinct script
names represented by the graphemes in
the binary is returned.
"""
def script(string) when is_binary(string) do
string
|> String.to_charlist()
|> Enum.map(&script/1)
|> Enum.uniq()
end
for {script, ranges} <- @scripts do
def script(codepoint) when unquote(Utils.ranges_to_guard_clause(ranges)) do
unquote(script)
end
end
def script(codepoint) when is_integer(codepoint) and codepoint in 0..0x10FFFF do
:unknown
end
end