From 73e5e7ace9b1c745b29e2a500a9e3d937854bfde Mon Sep 17 00:00:00 2001 From: Uvarov Michael Date: Tue, 14 Feb 2012 17:28:18 +0400 Subject: [PATCH] Split files by directories. Merge hrls. Move type information to files from .hrl-files. --- Makefile | 2 +- doc/README.md | 16 +- doc/edoc-info | 4 +- doc/uca/package-summary.md | 3 + doc/{ => uca}/ux_uca_compress.md | 18 +- doc/uca/ux_uca_extract.md | 7 + doc/{ => uca}/ux_uca_options.md | 29 +- doc/{ => uca}/ux_uca_sort_key_binary.md | 18 +- doc/{ => uca}/ux_uca_sort_key_binary_cs.md | 18 +- doc/{ => uca}/ux_uca_sort_key_list.md | 18 +- doc/{ => uca}/ux_uca_sort_key_uncompressed.md | 18 +- doc/uca/ux_uca_utils.md | 94 +++++ doc/unidata/package-summary.md | 3 + doc/utils/package-summary.md | 3 + doc/utils/ux_opt_ranges.md | 59 ++++ doc/{ux_range.md => utils/ux_ranges.md} | 32 +- doc/ux_char.md | 118 +++---- doc/ux_col.md | 333 ------------------ doc/ux_gb.md | 37 ++ doc/ux_math.md | 82 ----- doc/ux_par.md | 83 ----- doc/ux_ranges.md | 50 --- doc/ux_string.md | 257 +++++--------- doc/ux_types.md | 29 ++ doc/ux_uca.md | 149 +++++--- doc/ux_uca_alt.md | 10 - doc/ux_uca_extract.md | 10 - doc/ux_uca_tests.md | 10 - doc/ux_unidata_filelist.md | 156 -------- doc/ux_unidata_parser.md | 146 -------- doc/ux_unidata_parser_allkeys.md | 56 --- doc/ux_unidata_parser_blocks.md | 56 --- doc/ux_unidata_parser_comp_exclusions.md | 56 --- doc/ux_unidata_parser_norm_props.md | 56 --- doc/ux_unidata_parser_unidata.md | 56 --- doc/ux_unidata_server.md | 116 ------ doc/ux_unidata_store.md | 136 ------- doc/ux_unidata_store_sup.md | 58 --- doc/ux_utils.md | 56 --- doc/ux_wb.md | 46 +++ ebin/ux.app | 9 +- rebar.config | 9 +- root.xml | 275 --------------- src/uca/ux.hrl | 1 + src/{ => uca}/ux_uca.hrl | 47 +-- src/{ => uca}/ux_uca_alt.erl | 2 +- src/{ => uca}/ux_uca_compress.erl | 2 - src/{ => uca}/ux_uca_extract.erl | 19 +- src/{ => uca}/ux_uca_options.erl | 0 src/{ => uca}/ux_uca_sort_key_binary.erl | 9 +- src/{ => uca}/ux_uca_sort_key_binary_cs.erl | 8 +- src/{ => uca}/ux_uca_sort_key_list.erl | 9 +- .../ux_uca_sort_key_uncompressed.erl | 9 +- .../ux_uca_utils.erl} | 23 +- src/unidata/ux.hrl | 1 + src/{ => unidata}/ux_unidata_filelist.erl | 1 - src/{ => unidata}/ux_unidata_parser.erl | 1 - .../ux_unidata_parser_allkeys.erl | 9 +- .../ux_unidata_parser_blocks.erl | 0 .../ux_unidata_parser_comp_exclusions.erl | 0 ...unidata_parser_grapheme_break_property.erl | 0 .../ux_unidata_parser_norm_props.erl | 0 .../ux_unidata_parser_scripts.erl | 0 .../ux_unidata_parser_unidata.erl | 0 .../ux_unidata_parser_word_break_property.erl | 0 src/{ => unidata}/ux_unidata_server.erl | 1 - src/{ => unidata}/ux_unidata_store.erl | 1 - src/{ => unidata}/ux_unidata_store_sup.erl | 0 src/{ => utils}/ux_opt_ranges.erl | 1 - src/{ => utils}/ux_ranges.erl | 1 - src/ux.hrl | 105 ++++++ src/ux_break.erl | 1 - src/ux_char.erl | 6 +- src/ux_char.hrl | 124 ------- src/ux_gb.erl | 2 +- src/ux_html.erl | 261 -------------- src/ux_string.erl | 4 +- src/ux_string.hrl | 57 --- src/ux_types.erl | 43 +++ src/ux_uca.erl | 61 +++- src/ux_unidata.erl | 2 +- src/ux_unidata.hrl | 30 -- src/ux_unidata_server.hrl | 5 - src/ux_wb.erl | 2 +- start-dev.sh | 2 +- {src => test}/ux_break_tests.erl | 0 {src => test}/ux_char_tests.erl | 0 {src => test}/ux_string_tests.erl | 1 - {src => test}/ux_tests.hrl | 2 + {src => test}/ux_uca_tests.erl | 1 - 90 files changed, 888 insertions(+), 2763 deletions(-) create mode 100644 doc/uca/package-summary.md rename doc/{ => uca}/ux_uca_compress.md (61%) create mode 100644 doc/uca/ux_uca_extract.md rename doc/{ => uca}/ux_uca_options.md (71%) rename doc/{ => uca}/ux_uca_sort_key_binary.md (58%) rename doc/{ => uca}/ux_uca_sort_key_binary_cs.md (57%) rename doc/{ => uca}/ux_uca_sort_key_list.md (58%) rename doc/{ => uca}/ux_uca_sort_key_uncompressed.md (56%) create mode 100644 doc/uca/ux_uca_utils.md create mode 100644 doc/unidata/package-summary.md create mode 100644 doc/utils/package-summary.md create mode 100644 doc/utils/ux_opt_ranges.md rename doc/{ux_range.md => utils/ux_ranges.md} (62%) delete mode 100644 doc/ux_col.md create mode 100644 doc/ux_gb.md delete mode 100644 doc/ux_math.md delete mode 100644 doc/ux_par.md delete mode 100644 doc/ux_ranges.md create mode 100644 doc/ux_types.md delete mode 100644 doc/ux_uca_alt.md delete mode 100644 doc/ux_uca_extract.md delete mode 100644 doc/ux_uca_tests.md delete mode 100644 doc/ux_unidata_filelist.md delete mode 100644 doc/ux_unidata_parser.md delete mode 100644 doc/ux_unidata_parser_allkeys.md delete mode 100644 doc/ux_unidata_parser_blocks.md delete mode 100644 doc/ux_unidata_parser_comp_exclusions.md delete mode 100644 doc/ux_unidata_parser_norm_props.md delete mode 100644 doc/ux_unidata_parser_unidata.md delete mode 100644 doc/ux_unidata_server.md delete mode 100644 doc/ux_unidata_store.md delete mode 100644 doc/ux_unidata_store_sup.md delete mode 100644 doc/ux_utils.md create mode 100644 doc/ux_wb.md delete mode 100644 root.xml create mode 100644 src/uca/ux.hrl rename src/{ => uca}/ux_uca.hrl (70%) rename src/{ => uca}/ux_uca_alt.erl (98%) rename src/{ => uca}/ux_uca_compress.erl (98%) rename src/{ => uca}/ux_uca_extract.erl (97%) rename src/{ => uca}/ux_uca_options.erl (100%) rename src/{ => uca}/ux_uca_sort_key_binary.erl (93%) rename src/{ => uca}/ux_uca_sort_key_binary_cs.erl (94%) rename src/{ => uca}/ux_uca_sort_key_list.erl (91%) rename src/{ => uca}/ux_uca_sort_key_uncompressed.erl (91%) rename src/{ux_uca_common.hrl => uca/ux_uca_utils.erl} (85%) create mode 100644 src/unidata/ux.hrl rename src/{ => unidata}/ux_unidata_filelist.erl (99%) rename src/{ => unidata}/ux_unidata_parser.erl (99%) rename src/{ => unidata}/ux_unidata_parser_allkeys.erl (98%) rename src/{ => unidata}/ux_unidata_parser_blocks.erl (100%) rename src/{ => unidata}/ux_unidata_parser_comp_exclusions.erl (100%) rename src/{ => unidata}/ux_unidata_parser_grapheme_break_property.erl (100%) rename src/{ => unidata}/ux_unidata_parser_norm_props.erl (100%) rename src/{ => unidata}/ux_unidata_parser_scripts.erl (100%) rename src/{ => unidata}/ux_unidata_parser_unidata.erl (100%) rename src/{ => unidata}/ux_unidata_parser_word_break_property.erl (100%) rename src/{ => unidata}/ux_unidata_server.erl (99%) rename src/{ => unidata}/ux_unidata_store.erl (99%) rename src/{ => unidata}/ux_unidata_store_sup.erl (100%) rename src/{ => utils}/ux_opt_ranges.erl (99%) rename src/{ => utils}/ux_ranges.erl (98%) delete mode 100644 src/ux_break.erl delete mode 100644 src/ux_char.hrl delete mode 100644 src/ux_html.erl delete mode 100644 src/ux_string.hrl create mode 100644 src/ux_types.erl delete mode 100644 src/ux_unidata.hrl delete mode 100644 src/ux_unidata_server.hrl rename {src => test}/ux_break_tests.erl (100%) rename {src => test}/ux_char_tests.erl (100%) rename {src => test}/ux_string_tests.erl (99%) rename {src => test}/ux_tests.hrl (86%) rename {src => test}/ux_uca_tests.erl (99%) diff --git a/Makefile b/Makefile index 4e1f9c6..022a954 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ all: edoc: @$(REBAR) skip_deps=true doc -test: +eunit: @$(REBAR) skip_deps=true eunit clean: diff --git a/doc/README.md b/doc/README.md index 6df71ad..2e0a826 100644 --- a/doc/README.md +++ b/doc/README.md @@ -1,18 +1,24 @@ -

The ux application

+#The ux application# -The ux application -================== +##Packages## -

Modules

+
uca
unidata
utils
+ + + +##Modules## + + -
ux_char
ux_gb
ux_string
ux_types
ux_uca
ux_uca_options
+ux_uca_options +ux_wb diff --git a/doc/edoc-info b/doc/edoc-info index f6f6b3c..50f4f33 100644 --- a/doc/edoc-info +++ b/doc/edoc-info @@ -1,3 +1,3 @@ {application,ux}. -{packages,[]}. -{modules,[ux_char,ux_string,ux_uca,ux_uca_options]}. +{packages,[uca,unidata,utils]}. +{modules,[ux_char,ux_gb,ux_string,ux_types,ux_uca,ux_uca_options,ux_wb]}. diff --git a/doc/uca/package-summary.md b/doc/uca/package-summary.md new file mode 100644 index 0000000..3a0a080 --- /dev/null +++ b/doc/uca/package-summary.md @@ -0,0 +1,3 @@ + + +#Package uca# diff --git a/doc/ux_uca_compress.md b/doc/uca/ux_uca_compress.md similarity index 61% rename from doc/ux_uca_compress.md rename to doc/uca/ux_uca_compress.md index 96d7f72..2280b23 100644 --- a/doc/ux_uca_compress.md +++ b/doc/uca/ux_uca_compress.md @@ -1,33 +1,27 @@ -Module ux_uca_compress -====================== -

Module ux_uca_compress

- +#Module ux_uca_compress# * [Function Index](#index) * [Function Details](#functions) + - -

Function Index

- +##Function Index##
reassign_fun/3
+ - -

Function Details

- +##Function Details## -

reassign_fun/3

- +###reassign_fun/3## diff --git a/doc/uca/ux_uca_extract.md b/doc/uca/ux_uca_extract.md new file mode 100644 index 0000000..601f993 --- /dev/null +++ b/doc/uca/ux_uca_extract.md @@ -0,0 +1,7 @@ + + +#Module ux_uca_extract# + + + + diff --git a/doc/ux_uca_options.md b/doc/uca/ux_uca_options.md similarity index 71% rename from doc/ux_uca_options.md rename to doc/uca/ux_uca_options.md index 6c8c58d..d5ac86f 100644 --- a/doc/ux_uca_options.md +++ b/doc/uca/ux_uca_options.md @@ -1,9 +1,6 @@ -Module ux_uca_options -===================== -

Module ux_uca_options

- +#Module ux_uca_options# * [Description](#description) * [Function Index](#index) * [Function Details](#functions) @@ -12,32 +9,28 @@ Module ux_uca_options This library contains functions for manipulating with a configuration of sorting. + - -

Description

- +##Description## You can use it as: `C = ux_uca_options:get_options(shifted).` And then: -`ux_uca:sort(C, ["string1", "string2", "string3").` - -

Function Index

+`ux_uca:sort(C, ["string1", "string2", "string3").` +##Function Index##
get_options/0
get_options/1
get_options/2If you want use this library without import *.hrl, you can create a #uca_options {} record with this function.
+ - -

Function Details

- +##Function Details## -

get_options/0

- +###get_options/0## @@ -46,8 +39,7 @@ a #uca_options {} record with this function. -

get_options/1

- +###get_options/1## @@ -56,8 +48,7 @@ a #uca_options {} record with this function. -

get_options/2

- +###get_options/2## diff --git a/doc/ux_uca_sort_key_binary.md b/doc/uca/ux_uca_sort_key_binary.md similarity index 58% rename from doc/ux_uca_sort_key_binary.md rename to doc/uca/ux_uca_sort_key_binary.md index 6d63dea..d490567 100644 --- a/doc/ux_uca_sort_key_binary.md +++ b/doc/uca/ux_uca_sort_key_binary.md @@ -1,33 +1,27 @@ -Module ux_uca_sort_key_binary -============================= -

Module ux_uca_sort_key_binary

- +#Module ux_uca_sort_key_binary# * [Function Index](#index) * [Function Details](#functions) + - -

Function Index

- +##Function Index##
sort_key/2
+ - -

Function Details

- +##Function Details## -

sort_key/2

- +###sort_key/2## diff --git a/doc/ux_uca_sort_key_binary_cs.md b/doc/uca/ux_uca_sort_key_binary_cs.md similarity index 57% rename from doc/ux_uca_sort_key_binary_cs.md rename to doc/uca/ux_uca_sort_key_binary_cs.md index 45a2f15..4051ab6 100644 --- a/doc/ux_uca_sort_key_binary_cs.md +++ b/doc/uca/ux_uca_sort_key_binary_cs.md @@ -1,33 +1,27 @@ -Module ux_uca_sort_key_binary_cs -================================ -

Module ux_uca_sort_key_binary_cs

- +#Module ux_uca_sort_key_binary_cs# * [Function Index](#index) * [Function Details](#functions) + - -

Function Index

- +##Function Index##
sort_key/2
+ - -

Function Details

- +##Function Details## -

sort_key/2

- +###sort_key/2## diff --git a/doc/ux_uca_sort_key_list.md b/doc/uca/ux_uca_sort_key_list.md similarity index 58% rename from doc/ux_uca_sort_key_list.md rename to doc/uca/ux_uca_sort_key_list.md index 4ff0ef7..7a12ad7 100644 --- a/doc/ux_uca_sort_key_list.md +++ b/doc/uca/ux_uca_sort_key_list.md @@ -1,33 +1,27 @@ -Module ux_uca_sort_key_list -=========================== -

Module ux_uca_sort_key_list

- +#Module ux_uca_sort_key_list# * [Function Index](#index) * [Function Details](#functions) + - -

Function Index

- +##Function Index##
sort_key/2
+ - -

Function Details

- +##Function Details## -

sort_key/2

- +###sort_key/2## diff --git a/doc/ux_uca_sort_key_uncompressed.md b/doc/uca/ux_uca_sort_key_uncompressed.md similarity index 56% rename from doc/ux_uca_sort_key_uncompressed.md rename to doc/uca/ux_uca_sort_key_uncompressed.md index 405828c..07d0d63 100644 --- a/doc/ux_uca_sort_key_uncompressed.md +++ b/doc/uca/ux_uca_sort_key_uncompressed.md @@ -1,33 +1,27 @@ -Module ux_uca_sort_key_uncompressed -=================================== -

Module ux_uca_sort_key_uncompressed

- +#Module ux_uca_sort_key_uncompressed# * [Function Index](#index) * [Function Details](#functions) + - -

Function Index

- +##Function Index##
sort_key/2
+ - -

Function Details

- +##Function Details## -

sort_key/2

- +###sort_key/2## diff --git a/doc/uca/ux_uca_utils.md b/doc/uca/ux_uca_utils.md new file mode 100644 index 0000000..beb059f --- /dev/null +++ b/doc/uca/ux_uca_utils.md @@ -0,0 +1,94 @@ + + +#Module ux_uca_utils# +* [Function Index](#index) +* [Function Details](#functions) + + + + + + +##Function Index## + + +
do_alt/2
do_alt/3
do_extract/3
get_ducet/0
get_options/0
get_reassign_function/2
split_levels/3
+ + + + +##Function Details## + + + +###do_alt/2## + + + + +
do_alt(A::function(), W::binary() | integer()) -> [integer()]
+

+ + + + +###do_alt/3## + + + + +`do_alt(A, W, S) -> any()` + + + +###do_extract/3## + + + + +
do_extract(Uca_options::#uca_options{}, S::string(), D::function()) -> {integer(), string()}
+

+ + + + +###get_ducet/0## + + + + +
get_ducet() -> function()
+

+ + + + +###get_options/0## + + + + +
get_options() -> #uca_options{}
+

+ + + + +###get_reassign_function/2## + + + + +`get_reassign_function(D, L) -> any()` + + + +###split_levels/3## + + + + +
split_levels(L::integer(), B::boolean(), W::[[integer()]]) -> {[integer()], [[integer()]]}
+

+ + diff --git a/doc/unidata/package-summary.md b/doc/unidata/package-summary.md new file mode 100644 index 0000000..57294da --- /dev/null +++ b/doc/unidata/package-summary.md @@ -0,0 +1,3 @@ + + +#Package unidata# diff --git a/doc/utils/package-summary.md b/doc/utils/package-summary.md new file mode 100644 index 0000000..812ddc5 --- /dev/null +++ b/doc/utils/package-summary.md @@ -0,0 +1,3 @@ + + +#Package utils# diff --git a/doc/utils/ux_opt_ranges.md b/doc/utils/ux_opt_ranges.md new file mode 100644 index 0000000..efccb6c --- /dev/null +++ b/doc/utils/ux_opt_ranges.md @@ -0,0 +1,59 @@ + + +#Module ux_opt_ranges# +* [Description](#description) +* [Function Index](#index) +* [Function Details](#functions) + + +Functions for working with ranges in lists. + + + +##Description## + + + + +ETS is fast only as a key-value store. +But some data files contains ranges: From..To. +The fastest way is using lists for storing this values. + + + +There is two types of these lists: +* with booleans: `[{1,3}, 6, {8,9}]`. For example, `is_compat`; +* with values: `[{{1,3}, value1}, {{4,12}, value2}]`. + +`in_list` function is for the first type. +`search` function is for the second type. + + +##Function Index## + + +
in_list/1
search/2
+ + + + +##Function Details## + + + +###in_list/1## + + + + +`in_list(V) -> any()` + + + +###search/2## + + + + +`search(Def, V) -> any()` + diff --git a/doc/ux_range.md b/doc/utils/ux_ranges.md similarity index 62% rename from doc/ux_range.md rename to doc/utils/ux_ranges.md index a99a46f..8421b76 100644 --- a/doc/ux_range.md +++ b/doc/utils/ux_ranges.md @@ -1,23 +1,21 @@ -Module ux_range -=============== -

Module ux_range

- +#Module ux_ranges# * [Description](#description) * [Function Index](#index) * [Function Details](#functions) -ETS is fast only as a key-value store. +Functions for working with ranges in lists. + +##Description## -

Description

- +ETS is fast only as a key-value store. But some data files contains ranges: From..To. The fastest way is using lists for storing this values. @@ -29,41 +27,37 @@ There is two types of these lists: `in_list` function is for the first type. `search` function is for the second type. + - -

Function Index

- +##Function Index##
in_list/2
search/2
+ - -

Function Details

- +##Function Details## -

in_list/2

- +###in_list/2## -
in_list(T::[{integer(), integer()} | integer()], V::integer()) -> boolean()
+
in_list(T::[{integer(), integer()} | integer()], H::integer()) -> boolean()


-

search/2

- +###search/2## -
search(T::[{{integer(), integer()} | integer(), term()}], V::integer()) -> boolean()
+
search(T::[{{integer(), integer()} | integer(), term()}], H::integer()) -> boolean()


diff --git a/doc/ux_char.md b/doc/ux_char.md index 477fb6a..432718b 100644 --- a/doc/ux_char.md +++ b/doc/ux_char.md @@ -1,10 +1,8 @@ -Module ux_char -============== -

Module ux_char

- +#Module ux_char# * [Description](#description) +* [Data Types](#types) * [Function Index](#index) * [Function Details](#functions) @@ -16,59 +14,66 @@ Character functions. Copyright (c) 2010-2011 Michael Uvarov __Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). + -

Function Index

+##Data Types## -
block/1
comment/1
is_acsii/1
is_cjk_compatibility_ideograph/1
is_cjk_unified_ideograph/1
is_decimal/1Return true, if C is a decimal number.
is_hangul/1
is_hangul_precomposed/1
is_letter/1Returns true, if C is a letter.
is_lower/1
is_mark/1
is_number/1Returns true, if is C is a number.
is_punctuation_mark/1Returns true, if is C is a punctiation mark.
is_separator/1Return true, if is C is a separator.
is_unified_ideograph/1
is_upper/1
to_lower/1
to_ncr/1
to_upper/1
type/1
+###char_type()## -

Function Details

+
char_type() = ux_types:char_type()
+ +##Function Index## + + +
block/1
comment/1
is_acsii/1
is_cjk_compatibility_ideograph/1
is_cjk_unified_ideograph/1
is_decimal/1Return true, if C is a decimal number.
is_hangul/1
is_hangul_precomposed/1
is_letter/1Returns true, if C is a letter.
is_lower/1
is_mark/1
is_number/1Returns true, if is C is a number.
is_punctuation_mark/1Returns true, if is C is a punctiation mark.
is_separator/1Return true, if is C is a separator.
is_unified_ideograph/1
is_upper/1
script/1
to_lower/1
to_ncr/1
to_upper/1
type/1
- -

block/1

+ +##Function Details## + + +###block/1## -
block(V::char) -> atom()
+ +
block(V::char) -> atom()


-

comment/1

- +###comment/1## -
comment(V::char()) -> binary()
+
comment(V::char()) -> binary()


-

is_acsii/1

+###is_acsii/1## - -
is_acsii(Char::char()) -> boolean()
+
is_acsii(Char::char()) -> boolean()


-

is_cjk_compatibility_ideograph/1

- +###is_cjk_compatibility_ideograph/1## @@ -77,8 +82,7 @@ __Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). -

is_cjk_unified_ideograph/1

- +###is_cjk_unified_ideograph/1## @@ -87,13 +91,12 @@ __Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). -

is_decimal/1

- +###is_decimal/1## -
is_decimal(C::char()) -> boolean()
+
is_decimal(C::char()) -> boolean()


@@ -101,8 +104,7 @@ __Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). Return true, if C is a decimal number. -

is_hangul/1

- +###is_hangul/1## @@ -111,8 +113,7 @@ Return true, if C is a decimal number. -

is_hangul_precomposed/1

- +###is_hangul_precomposed/1## @@ -121,13 +122,12 @@ Return true, if C is a decimal number. -

is_letter/1

+###is_letter/1## - -
is_letter(C::char()) -> boolean()
+
is_letter(C::char()) -> boolean()


@@ -135,20 +135,18 @@ Return true, if C is a decimal number. Returns true, if C is a letter. -

is_lower/1

- +###is_lower/1## -
is_lower(V::char()) -> boolean()
+
is_lower(V::char()) -> boolean()


-

is_mark/1

- +###is_mark/1## @@ -157,13 +155,12 @@ Returns true, if C is a letter. -

is_number/1

- +###is_number/1## -
is_number(C::char()) -> boolean()
+
is_number(C::char()) -> boolean()


@@ -171,13 +168,12 @@ Returns true, if C is a letter. Returns true, if is C is a number. -

is_punctuation_mark/1

+###is_punctuation_mark/1## - -
is_punctuation_mark(C::char()) -> boolean()
+
is_punctuation_mark(C::char()) -> boolean()


@@ -185,13 +181,12 @@ Returns true, if is C is a number. Returns true, if is C is a punctiation mark. -

is_separator/1

- +###is_separator/1## -
is_separator(C::char()) -> boolean()
+
is_separator(C::char()) -> boolean()


@@ -199,8 +194,7 @@ Returns true, if is C is a punctiation mark. Return true, if is C is a separator. -

is_unified_ideograph/1

- +###is_unified_ideograph/1## @@ -209,56 +203,62 @@ Return true, if is C is a separator. -

is_upper/1

+###is_upper/1## + + + +
is_upper(V::char()) -> boolean()
+

+ + + + +###script/1## -
is_upper(V::char()) -> boolean()
+
script(V::char) -> atom()


-

to_lower/1

+###to_lower/1## - -
to_lower(V::char()) -> char()
+
to_lower(V::char()) -> char()


-

to_ncr/1

- +###to_ncr/1## -
to_ncr(Char::char()) -> string()
+
to_ncr(Char::char()) -> string()


-

to_upper/1

+###to_upper/1## - -
to_upper(V::char()) -> char()
+
to_upper(V::char()) -> char()


-

type/1

- +###type/1## diff --git a/doc/ux_col.md b/doc/ux_col.md deleted file mode 100644 index 086c395..0000000 --- a/doc/ux_col.md +++ /dev/null @@ -1,333 +0,0 @@ -Module ux_col -============= - - -

Module ux_col

- -* [Description](#description) -* [Function Index](#index) -* [Function Details](#functions) - - -UCA. - - - -Copyright (c) 2010-2011 Michael Uvarov - -__Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). - -__See also:__ [ux](ux.md). - -

Description

- - - - - -

Additional information (and links)

- - - - - -1. [Hangul Collation Requirements](http://www.open-std.org/jtc1/sc22/wg20/docs/n1037-Hangul%20Collation%20Requirements.htm) -PS: There is the main source of information. - - - -2. [Terminator weight for Hangul](http://code.activestate.com/lists/perl-unicode/2163/) - - - -3. [Theory vs. practice for Korean text collation](http://blogs.msdn.com/b/michkap/archive/2005/02/25/380266.aspx) -PS: there is no any practice. They do not the UCA :/ - - - -4. [Wiki](http://en.wikipedia.org/wiki/Unicode_collation_algorithm) - - - -6. [Unicode implementer's guide part 3: Conjoining jamo behavior](http://useless-factor.blogspot.com/2007/08/unicode-implementers-guide-part-3.md) - - - -7. [Unicode implementer's guide part 5: Collation](http://useless-factor.blogspot.com/2007/10/unicode-implementers-guide-part-5.md) - - - -8. [Unicode collation works now](http://useless-factor.blogspot.com/2008/05/unicode-collation-works-now.md) -PS: I found it so late. :( - - - -9. [ICU](http://userguide.icu-project.org/collation/concepts) - -10. [String Sorting (Natural) in Erlang Cookbook](http://trapexit.org/String_Sorting_%28Natural%29) -
   FIXED: Combining character contractions. Apparently, two combining marks can
-form a contraction. A straight reading of the UCA wouldn't predict
-this, but not all of the UCA tests pass unless you check for
-non-adjacent combining marks being in a contraction together, without
-a noncombining mark to start it off.
- -

Function Index

- - - -
blanked/2Variable collation elements and any subsequent ignorables -are reset so that their weights at levels one through three are zero.
compare/2
compare/3
ducet/1In: not reversed string.
get_options/0
get_options/1
get_options/2If you want use this library without import *.hrl, you can create -a #uca_options {} record with this function.
non_ignorable/2Variable collation elements are not reset to be ignorable, but -get the weights explicitly mentioned in the file.
shift_trimmed/2This option is the same as Shifted, except that all trailing -FFFFs are trimmed from the sort key.
shifted/2Variable collation elements are reset to zero at levels one through -three.
sort/1
sort/2Sort a string list.
sort_array/1
sort_array/2
sort_array_blanked/1
sort_array_non_ignorable/1
sort_array_shift_trimmed/1
sort_array_shifted/1
sort_key/1
sort_key/2
- - - - -

Function Details

- - - - -

blanked/2

- - - - - -`blanked(S1, S2) -> any()` - - - -Variable collation elements and any subsequent ignorables -are reset so that their weights at levels one through three are zero. -For example, -
  * SPACE would have the value [.0000.0000.0000]
-* A combining grave accent after a space would have the value [.0000.0000.0000]
-* Capital A would be unchanged, with the value [.06D9.0020.0008]
-* A combining grave accent after a Capital A would be unchanged
- -

compare/2

- - - - - -`compare(String1, String2) -> any()` - - - -

compare/3

- - - - - -`compare(String1, String2, Uca_options) -> any()` - - - -

ducet/1

- - - - - -`ducet(A) -> any()` - - - -In: not reversed string. -Out: not reversed weight list. - -

get_options/0

- - - - - -`get_options() -> any()` - - - -

get_options/1

- - - - - -`get_options(Params) -> any()` - - - -

get_options/2

- - - - - -`get_options(T, Opt) -> any()` - - - -If you want use this library without import *.hrl, you can create -a #uca_options {} record with this function. - -

non_ignorable/2

- - - - - -`non_ignorable(S1, S2) -> any()` - - - -Variable collation elements are not reset to be ignorable, but -get the weights explicitly mentioned in the file. -
  * SPACE would have the value [.0209.0020.0002]
-* Capital A would be unchanged, with the value [.06D9.0020.0008]
-* Ignorables are unchanged.
- -

shift_trimmed/2

- - - - - -`shift_trimmed(S1, S2) -> any()` - - - -This option is the same as Shifted, except that all trailing -FFFFs are trimmed from the sort key. -This could be used to emulate POSIX behavior. - -

shifted/2

- - - - - -`shifted(S1, S2) -> any()` - - - -Variable collation elements are reset to zero at levels one through -three. In addition, a new fourth-level weight is appended, whose value -depends on the type, as shown in Table 12. -Any subsequent primary or secondary ignorables following a variable are reset -so that their weights at levels one through four are zero. -
  * A combining grave accent after a space would have the value
-[.0000.0000.0000.0000].
-* A combining grave accent after a Capital A would be unchanged.
- -

sort/1

- - - - - -`sort(Lists) -> any()` - - - -

sort/2

- - - - - -`sort(Lists, Alt) -> any()` - - - -Sort a string list. -Example: -
      f().
-RawData = ["death", "de luge", "de-luge", "deluge", "de-luge", "de Luge", "de-Luge", "deLuge", "de-Luge", "demark"].
-Data = lists:map(fun lists:flatten/1, RawData).
-ux_string:sort(Data, non_ignorable).
-ux_string:sort(Data, blanked).
-ux_string:sort(Data, shifted).
-ux_string:sort(Data, shift_trimmed).
- -

sort_array/1

- - - - - -`sort_array(Str) -> any()` - - - -

sort_array/2

- - - - - -`sort_array(Str, Params) -> any()` - - - -

sort_array_blanked/1

- - - - - -`sort_array_blanked(Str) -> any()` - - - -

sort_array_non_ignorable/1

- - - - - -`sort_array_non_ignorable(Str) -> any()` - - - -

sort_array_shift_trimmed/1

- - - - - -`sort_array_shift_trimmed(Str) -> any()` - - - -

sort_array_shifted/1

- - - - - -`sort_array_shifted(Str) -> any()` - - - -

sort_key/1

- - - - - -`sort_key(Str) -> any()` - - - -

sort_key/2

- - - - - -`sort_key(Str, Uca_options) -> any()` - diff --git a/doc/ux_gb.md b/doc/ux_gb.md new file mode 100644 index 0000000..08850c2 --- /dev/null +++ b/doc/ux_gb.md @@ -0,0 +1,37 @@ + + +#Module ux_gb# +* [Description](#description) +* [Function Index](#index) +* [Function Details](#functions) + + +Default Grapheme Cluster Boundary Breaker. + + + +##Description## + + +[UTR29: Grapheme Cluster Boundaries] +(http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) + +##Function Index## + + +
split/2
+ + + + +##Function Details## + + + +###split/2## + + + + +`split(T, S) -> any()` + diff --git a/doc/ux_math.md b/doc/ux_math.md deleted file mode 100644 index 83c1df9..0000000 --- a/doc/ux_math.md +++ /dev/null @@ -1,82 +0,0 @@ -Module ux_math -============== - - -

Module ux_math

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -Copyright (c) 2010-2011 Michael Uvarov - -__Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). - -__See also:__ [ux](ux.md). - -

Function Index

- - - -
average/1
factorial/1
stdev/1
stdev_filter_miss/1
variance/1
- - - - -

Function Details

- - - - -

average/1

- - - - - -`average(Tail) -> any()` - - - -

factorial/1

- - - - - -`factorial(N) -> any()` - - - -

stdev/1

- - - - - -`stdev(X) -> any()` - - - -

stdev_filter_miss/1

- - - - - -`stdev_filter_miss(Val) -> any()` - - - -

variance/1

- - - - - -`variance(X) -> any()` - diff --git a/doc/ux_par.md b/doc/ux_par.md deleted file mode 100644 index 918dbe0..0000000 --- a/doc/ux_par.md +++ /dev/null @@ -1,83 +0,0 @@ -Module ux_par -============= - - -

Module ux_par

- -* [Description](#description) -* [Function Index](#index) -* [Function Details](#functions) - - -This Module contains functions for working with Req:parse_post() list -from the mochiweb library. - - - -

Description

- - - -Example: -[Extraction of params from a POST data list](https://github.com/freeakk/web_col). -
   col_params(PostList) ->
-V = ux_col:get_options([
-{natural_sort, ux_par:atom("natural_sort", PostList)},
-{case_sensitive, ux_par:atom("case_sensitive", PostList)},
-{strength, ux_par:integer("strength", PostList)},
-{alternate, ux_par:atom("alternate", PostList)},
-{case_first, ux_par:atom("case_first", PostList)}
-]).'
- -

Function Index

- - - -
atom/2
el/2
integer/2
string/2
- - - - -

Function Details

- - - - -

atom/2

- - - - - -`atom(Name, List) -> any()` - - - -

el/2

- - - - - -`el(Name, List) -> any()` - - - -

integer/2

- - - - - -`integer(Name, List) -> any()` - - - -

string/2

- - - - - -`string(Name, List) -> any()` - diff --git a/doc/ux_ranges.md b/doc/ux_ranges.md deleted file mode 100644 index 2216e17..0000000 --- a/doc/ux_ranges.md +++ /dev/null @@ -1,50 +0,0 @@ -Module ux_ranges -================ - - -

Module ux_ranges

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
in_list/2
search/2
- - - - -

Function Details

- - - - -

in_list/2

- - - - - -
in_list(T::[{integer(), integer()} | integer()], V::integer()) -> boolean()
-

- - - - -

search/2

- - - - - -
search(T::[{{integer(), integer()} | integer(), term()}], V::integer()) -> boolean()
-

- - diff --git a/doc/ux_string.md b/doc/ux_string.md index d22ed97..6b6ba39 100644 --- a/doc/ux_string.md +++ b/doc/ux_string.md @@ -1,10 +1,8 @@ -Module ux_string -================ -

Module ux_string

- +#Module ux_string# * [Description](#description) +* [Data Types](#types) * [Function Index](#index) * [Function Details](#functions) @@ -16,31 +14,41 @@ String functions. Copyright (c) 2010-2011 Michael Uvarov __Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). + + +##Data Types## + + + + +###char_type()## + -

Function Index

+
char_type() = ux_types:char_type()
+ + +##Function Index##
delete_types/2Returns a new string which is made from the chars of Str which are not a type from Types list.
delete_types/3Stops delete_type/2 after Limit deleted chars.
explode/2Splits the string by delimeters.
explode/3
explode_types/2Returns a new list of strings which are parts of Str splited -by separator chars of a type from Types list.
filter_types/2Returns a new string which is made from the chars of Str +by separator chars of a type from Types list.
extract_words/1
filter_types/2Returns a new string which is made from the chars of Str which are a type from Types list.
filter_types/3Stops after -Limit skipped chars.
first/2Return Len chars from the beginning of the string.
first_types/3If Len<0, then gets first Len chars of type, which is in Types -If Len>0, then gets first -Len chars of type, which is NOT in Types.
freq/1Counts how many identical chars in the string.
html_special_chars/1Encodes html special chars.
info/1Return information about a string.
is_nfc/1
is_nfd/1
is_nfkc/1
is_nfkd/1
last/2Return Len chars from the beginning of the string.
last_types/3If Len<0, then gets last Len chars of type, which is in Types -If Len>0, then gets last -Len chars of type, which is NOT in Types.
length/1Compute count of graphemes in the string.
reverse/1Reverses the string graphemes.
split/2
split/3
split_types/2Returns a new list of strings which are parts of Str splited -by separator chars of a type from Types list.
strip_tags/1Deletes tags from the string.
strip_tags/2
strip_tags/3
to_graphemes/1Split unicode string into +If Len>0, then gets first -Len chars of type, which is NOT in Types.
freq/1Counts how many identical chars in the string.
is_nfc/1
is_nfd/1
is_nfkc/1
is_nfkd/1
last/2Return Len chars from the beginning of the string.
last_types/3If Len<0, then gets last Len chars of type, which is in Types +If Len>0, then gets last -Len chars of type, which is NOT in Types.
length/1Compute count of graphemes in the string.
reverse/1Reverses the string graphemes.
script/1
scripts/1
split/2
split/3
split_types/2Returns a new list of strings which are parts of Str splited +by separator chars of a type from Types list.
to_graphemes/1Split unicode string into [graphemes](http://en.wikipedia.org/wiki/Grapheme).
to_lower/1Converts characters of a string to a lowercase format.
to_ncr/1Convert everything from utf-8 into an NCR (Numeric Character Reference).
to_nfc/1
to_nfd/1
to_nfkc/1
to_nfkd/1
to_string/1Converts something to string (list).
to_upper/1Converts characters of a string to a uppercase format.
types/1Returns various "character types" which can be used as a default categorization in implementations.
+ - -

Function Details

- +##Function Details## -

delete_types/2

- +###delete_types/2## @@ -54,8 +62,7 @@ as a default categorization in implementations. Returns a new string which is made from the chars of Str which are not a type from Types list. -

delete_types/3

- +###delete_types/3## @@ -69,13 +76,12 @@ which are not a type from Types list. Stops delete_type/2 after Limit deleted chars. If Limit < 0, then stops after -Limit skipped chars. -

explode/2

- +###explode/2## -
explode(Delimeter::[string()], Str::string()) -> [string()]
+
explode(Delimeter::[nonempty_string()], Str::string()) -> [string()]


@@ -83,20 +89,18 @@ stops after -Limit skipped chars. Splits the string by delimeters. -

explode/3

+###explode/3## - -
explode(Delimeter::[string()], Str::string(), Limit::integer()) -> string()
+
explode(Delimeter::[nonempty_string()], Str::string(), Limit::integer()) -> string()


-

explode_types/2

- +###explode_types/2## @@ -108,10 +112,18 @@ Splits the string by delimeters. Returns a new list of strings which are parts of Str splited -by separator chars of a type from Types list. +by separator chars of a type from Types list. + +###extract_words/1## + + + + +`extract_words(S) -> any()` -

filter_types/2

+ +###filter_types/2## @@ -125,8 +137,7 @@ by separator chars of a type from Types list. Returns a new string which is made from the chars of Str which are a type from Types list. -

filter_types/3

- +###filter_types/3## @@ -139,8 +150,7 @@ which are a type from Types list. Stops after -Limit skipped chars. -

first/2

- +###first/2## @@ -151,8 +161,7 @@ Stops after -Limit skipped chars. Return Len chars from the beginning of the string. -

first_types/3

- +###first_types/3## @@ -166,13 +175,12 @@ Return Len chars from the beginning of the string. If Len<0, then gets first Len chars of type, which is in Types If Len>0, then gets first -Len chars of type, which is NOT in Types -

freq/1

+###freq/1## - -
freq(Str::string()) -> dict()
+
freq(Str::string()) -> dict()


@@ -183,86 +191,53 @@ Returns a dict. Example:
  >dict:to_list(ux_string:freq("FFDF")).
   [{70,3},{68,1}]
- - -

html_special_chars/1

+ +###is_nfc/1## -
html_special_chars(Str::string()) -> string()
-

- - - - -Encodes html special chars. - -

info/1

- - - - - -
info(Str::string()) -> #unistr_info{}
-

- - - - -Return information about a string. - -

is_nfc/1

- - - - - -
is_nfc(Str::list()) -> yes | no | maybe
+
is_nfc(Str::list()) -> yes | no | maybe


-

is_nfd/1

- +###is_nfd/1## -
is_nfd(Str::list()) -> yes | no | maybe
+
is_nfd(Str::list()) -> yes | no | maybe


-

is_nfkc/1

+###is_nfkc/1## - -
is_nfkc(Str::list()) -> yes | no | maybe
+
is_nfkc(Str::list()) -> yes | no | maybe


-

is_nfkd/1

- +###is_nfkd/1## -
is_nfkd(Str::list()) -> yes | no | maybe
+
is_nfkd(Str::list()) -> yes | no | maybe


-

last/2

- +###last/2## @@ -273,8 +248,7 @@ Return information about a string. Return Len chars from the beginning of the string. -

last_types/3

- +###last_types/3## @@ -288,20 +262,18 @@ Return Len chars from the beginning of the string. If Len<0, then gets last Len chars of type, which is in Types If Len>0, then gets last -Len chars of type, which is NOT in Types -

length/1

- +###length/1## -`length(Str) -> any()` +`length(S) -> any()` Compute count of graphemes in the string. -

reverse/1

- +###reverse/1## @@ -310,96 +282,60 @@ Compute count of graphemes in the string. -Reverses the string graphemes. - -

split/2

- - - - - -`split(P1, P2) -> any()` - - - -

split/3

- +Reverses the string graphemes. +###script/1## -`split(P1, P2, P3) -> any()` - +`script(S) -> any()` -

split_types/2

+ +###scripts/1## -
split_types(Types::[char_type()], Str::string()) -> string()
-

+`scripts(S) -> any()` + +###split/2## -Returns a new list of strings which are parts of Str splited -by separator chars of a type from Types list. Parts can not be -empty. -

strip_tags/1

- - - - - -
strip_tags(Str::string()) -> string()
-

+`split(P1, P2) -> any()` + +###split/3## -Deletes tags from the string. -Example: -
       > ux_string:strip_tags("some string").
-       "some string"
-       > ux_string:strip_tags("

Head

and paragraf

", ["h1"]). - "

Head

and paragraf" - ux_string:strip_tags("

Head

", ["!--"]). - "Head" - ux_string:st("a
b", [], " "). - "a b"
+`split(P1, P2, P3) -> any()` -

strip_tags/2

+ +###split_types/2## -
strip_tags(Str::string, Allowed::[string() | atom() | char()]) -> string()
+
split_types(Types::[char_type()], Str::string()) -> string()


- - -

strip_tags/3

- - - - - -
strip_tags(Str::string, Allowed::[string() | atom() | char()], Alt::string()) -> string()
-

- - -

to_graphemes/1

+Returns a new list of strings which are parts of Str splited +by separator chars of a type from Types list. Parts can not be +empty. +###to_graphemes/1## @@ -428,13 +364,12 @@ by two Unicode code points. These user-perceived characters are approximated by what is called a grapheme cluster, which can be determined programmatically. -

to_lower/1

+###to_lower/1## - -
to_lower(Str::string()) -> string()
+
to_lower(Str::string()) -> string()


@@ -442,8 +377,7 @@ determined programmatically. Converts characters of a string to a lowercase format. -

to_ncr/1

- +###to_ncr/1## @@ -454,61 +388,56 @@ Converts characters of a string to a lowercase format. Convert everything from utf-8 into an NCR (Numeric Character Reference). -

to_nfc/1

- +###to_nfc/1## -
to_nfc(Str::list()) -> list()
+
to_nfc(Str::list()) -> list()


-

to_nfd/1

+###to_nfd/1## - -
to_nfd(Str::list()) -> list()
+
to_nfd(Str::list()) -> list()


-

to_nfkc/1

- +###to_nfkc/1## -
to_nfkc(Str::list()) -> list()
+
to_nfkc(Str::list()) -> list()


-

to_nfkd/1

- +###to_nfkd/1## -
to_nfkd(Str::list()) -> list()
+
to_nfkd(Str::list()) -> list()


-

to_string/1

+###to_string/1## - -
to_string(Str::string() | atom() | integer()) -> string()
+
to_string(Str::string() | atom() | integer()) -> string()


@@ -516,13 +445,12 @@ Convert everything from utf-8 into an NCR (Numeric Character Reference). -

to_upper/1

- +###to_upper/1## -
to_upper(Str::string()) -> string()
+
to_upper(Str::string()) -> string()


@@ -530,8 +458,7 @@ Converts something to string (list). Converts characters of a string to a uppercase format. -

types/1

- +###types/1## diff --git a/doc/ux_types.md b/doc/ux_types.md new file mode 100644 index 0000000..ec55334 --- /dev/null +++ b/doc/ux_types.md @@ -0,0 +1,29 @@ + + +#Module ux_types# +* [Data Types](#types) + + + + + + + +##Data Types## + + + + +###char_type()## + + + +
char_type() = lu | ll | lt | mn | mc | me | nd | nl | no | zs | zl | zp | cc | cf | cs | co | cn | lm | lo | pc | pd | ps | pe | pi | pf | po | sm | sc | sk | so | other
+ + + +###ux_ccc()## + + + +
ux_ccc() = 0..240
diff --git a/doc/ux_uca.md b/doc/ux_uca.md index 62df95e..e67952c 100644 --- a/doc/ux_uca.md +++ b/doc/ux_uca.md @@ -1,9 +1,6 @@ -Module ux_uca -============= -

Module ux_uca

- +#Module ux_uca# * [Description](#description) * [Data Types](#types) * [Function Index](#index) @@ -17,16 +14,14 @@ see Unicode Technical Standard #10. Copyright (c) 2010-2011 Michael Uvarov -__Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). - -

Description

- +__Authors:__ Michael Uvarov ([`freeakk@gmail.com`](mailto:freeakk@gmail.com)). +##Description## -

Additional information (and links)

+###Additional information (and links)## @@ -79,8 +74,7 @@ For hangul collation: -

Levels

- +###Levels## @@ -100,14 +94,12 @@ Example using levels: -

Common configurations

- +###Common configurations## -

Non-ignorable

- +####Non-ignorable## @@ -127,8 +119,7 @@ Example: -

Blanked

- +####Blanked## @@ -151,8 +142,7 @@ Example: -

Shifted

- +####Shifted## @@ -175,8 +165,7 @@ Example: -

Shift-trimmed

- +####Shift-trimmed## @@ -189,16 +178,22 @@ Example: ux_uca:sort_key(C, "Shift-trimmed collation sort key"). + +##Data Types## -

Data Types

+###result()## -

search_result()

+
result() = {[uca_elem()], string()}
+ + + +###search_result()## @@ -206,33 +201,93 @@ Example: -

uca_compare_result()

+###uca_alternate()## + + + +
uca_alternate() = shifted | shift_trimmed | non_ignorable | blanked
+ + + +###uca_array()## +
uca_array() = [uca_elem()]
+ + + +###uca_case_first()## + + + +
uca_case_first() = lower | upper | off
+ + + +###uca_compare_result()## + +
uca_compare_result() = lower | greater | equal
-

Function Index

+###uca_elem()## -
compare/2Compare two strings and return: lower, greater or equal.
compare/3
search/2
search/3
search/4
sort/1Sort a list of strings.
sort/2Sort a list of strings.
sort_array/1Convert the unicode string to the -collation element array
sort_array/2
sort_key/1Convert the unicode string to the sort key.
sort_key/2
+
uca_elem() = [atom() | uca_weight()]
-

Function Details

+###uca_sort_key_format()## + + + +
uca_sort_key_format() = binary | list | uncompressed
- -

compare/2

+###uca_strength()## +
uca_strength() = 1 | 2 | 3 | 4
+ + + +###uca_weight()## + + + +
uca_weight() = integer()
+ + + +###uca_weights()## + + + +
uca_weights() = [uca_weight()]
+ + +##Function Index## + + +
compare/2Compare two strings and return: lower, greater or equal.
compare/3
search/2
search/3
search/4
sort/1Sort a list of strings.
sort/2Sort a list of strings.
sort_array/1Convert the unicode string to the +collation element array
sort_array/2
sort_key/1Convert the unicode string to the sort key.
sort_key/2
+ + + + +##Function Details## + + + +###compare/2## + +
compare(S1::string(), S2::string()) -> uca_compare_result()
@@ -243,8 +298,7 @@ Example: Compare two strings and return: lower, greater or equal. -

compare/3

- +###compare/3## @@ -255,8 +309,7 @@ Compare two strings and return: lower, greater or equal. -

search/2

- +###search/2## @@ -267,8 +320,7 @@ Compare two strings and return: lower, greater or equal. -

search/3

- +###search/3## @@ -279,8 +331,7 @@ Compare two strings and return: lower, greater or equal. -

search/4

- +###search/4## @@ -291,13 +342,12 @@ Compare two strings and return: lower, greater or equal. -

sort/1

+###sort/1## - -
sort(Strings::[string()]) -> [string()]
+
sort(Strings::[string()]) -> [string()]


@@ -305,13 +355,12 @@ Compare two strings and return: lower, greater or equal. Sort a list of strings. -

sort/2

- +###sort/2## -
sort(Uca_options::#uca_options{}, Strings::[string()]) -> [string()]
+
sort(Uca_options::#uca_options{}, Strings::[string()]) -> [string()]


@@ -319,8 +368,7 @@ Sort a list of strings. Sort a list of strings. -

sort_array/1

- +###sort_array/1## @@ -332,8 +380,7 @@ Sort a list of strings. Convert the unicode string to the [collation element array](http://unicode.org/reports/tr10/#Step_2) -

sort_array/2

- +###sort_array/2## @@ -342,8 +389,7 @@ Convert the unicode string to the -

sort_key/1

- +###sort_key/1## @@ -354,8 +400,7 @@ Convert the unicode string to the Convert the unicode string to the sort key. -

sort_key/2

- +###sort_key/2## diff --git a/doc/ux_uca_alt.md b/doc/ux_uca_alt.md deleted file mode 100644 index 932f910..0000000 --- a/doc/ux_uca_alt.md +++ /dev/null @@ -1,10 +0,0 @@ -Module ux_uca_alt -================= - - -

Module ux_uca_alt

- - - - - diff --git a/doc/ux_uca_extract.md b/doc/ux_uca_extract.md deleted file mode 100644 index 69a933a..0000000 --- a/doc/ux_uca_extract.md +++ /dev/null @@ -1,10 +0,0 @@ -Module ux_uca_extract -===================== - - -

Module ux_uca_extract

- - - - - diff --git a/doc/ux_uca_tests.md b/doc/ux_uca_tests.md deleted file mode 100644 index b20f8e9..0000000 --- a/doc/ux_uca_tests.md +++ /dev/null @@ -1,10 +0,0 @@ -Module ux_uca_tests -=================== - - -

Module ux_uca_tests

- - - - - diff --git a/doc/ux_unidata_filelist.md b/doc/ux_unidata_filelist.md deleted file mode 100644 index 110f02c..0000000 --- a/doc/ux_unidata_filelist.md +++ /dev/null @@ -1,156 +0,0 @@ -Module ux_unidata_filelist -========================== - - -

Module ux_unidata_filelist

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_source/1
get_source/1
get_source/2
get_source_from/2
handle_call/3
handle_info/2
init/1
reg_pid/2
reg_pid/2
set_source/2
set_source/4
start_link/0
terminate/2
- - - - -

Function Details

- - - - -

get_source/1

- - - - - -`get_source(Value) -> any()` - - - -

get_source/1

- - - - - -`get_source(Value) -> any()` - - - -

get_source/2

- - - - - -`get_source(Parser, Type) -> any()` - - - -

get_source_from/2

- - - - - -`get_source_from(X1, Value) -> any()` - - - -

handle_call/3

- - - - - -`handle_call(X1, From, LoopData) -> any()` - - - -

handle_info/2

- - - - - -`handle_info(X1, LoopData) -> any()` - - - -

init/1

- - - - - -`init(X1) -> any()` - - - -

reg_pid/2

- - - - - -`reg_pid(Key, StoreServerPid) -> any()` - - - -

reg_pid/2

- - - - - -`reg_pid(Key, StoreServerPid) -> any()` - - - -

set_source/2

- - - - - -`set_source(Level, Key) -> any()` - - - -

set_source/4

- - - - - -`set_source(Level, Parser, Types, FileName) -> any()` - - - -

start_link/0

- - - - - -`start_link() -> any()` - - - -

terminate/2

- - - - - -`terminate(Reason, LoopData) -> any()` - diff --git a/doc/ux_unidata_parser.md b/doc/ux_unidata_parser.md deleted file mode 100644 index 6d91665..0000000 --- a/doc/ux_unidata_parser.md +++ /dev/null @@ -1,146 +0,0 @@ -Module ux_unidata_parser -======================== - - -

Module ux_unidata_parser

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
bool_fun/1
check/1
check_types/2
delete_spaces/1
ets_fun/2
expand_fun/2
expand_table/1
from_hex/1
get_functions/2
hex_to_int/1
run/1
split/2
- - - - -

Function Details

- - - - -

bool_fun/1

- - - - - -`bool_fun(Table) -> any()` - - - -

check/1

- - - - - -`check(X1) -> any()` - - - -

check_types/2

- - - - - -`check_types(D, A) -> any()` - - - -

delete_spaces/1

- - - - - -`delete_spaces(Str) -> any()` - - - -

ets_fun/2

- - - - - -`ets_fun(Table, DefaultValue) -> any()` - - - -

expand_fun/2

- - - - - -`expand_fun(Table, DefaultValue) -> any()` - - - -

expand_table/1

- - - - - -`expand_table(Table) -> any()` - - - -

from_hex/1

- - - - - -`from_hex(Str) -> any()` - - - -

get_functions/2

- - - - - -`get_functions(FileType, EtsTables) -> any()` - - - -

hex_to_int/1

- - - - - -`hex_to_int(Code) -> any()` - - - -

run/1

- - - - - -`run(X1) -> any()` - - - -

split/2

- - - - - -`split(Char, Str) -> any()` - diff --git a/doc/ux_unidata_parser_allkeys.md b/doc/ux_unidata_parser_allkeys.md deleted file mode 100644 index 0ae6e8e..0000000 --- a/doc/ux_unidata_parser_allkeys.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_unidata_parser_allkeys -================================ - - -

Module ux_unidata_parser_allkeys

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
after_parse/1
get_function/2
parse/1
- - - - -

Function Details

- - - - -

after_parse/1

- - - - - -`after_parse(Ets) -> any()` - - - -

get_function/2

- - - - - -`get_function(X1, Table) -> any()` - - - -

parse/1

- - - - - -`parse(In) -> any()` - diff --git a/doc/ux_unidata_parser_blocks.md b/doc/ux_unidata_parser_blocks.md deleted file mode 100644 index 4b334ea..0000000 --- a/doc/ux_unidata_parser_blocks.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_unidata_parser_blocks -=============================== - - -

Module ux_unidata_parser_blocks

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_function/2
parse/1
types/0
- - - - -

Function Details

- - - - -

get_function/2

- - - - - -`get_function(X1, Table) -> any()` - - - -

parse/1

- - - - - -`parse(In) -> any()` - - - -

types/0

- - - - - -`types() -> any()` - diff --git a/doc/ux_unidata_parser_comp_exclusions.md b/doc/ux_unidata_parser_comp_exclusions.md deleted file mode 100644 index e9b372c..0000000 --- a/doc/ux_unidata_parser_comp_exclusions.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_unidata_parser_comp_exclusions -======================================== - - -

Module ux_unidata_parser_comp_exclusions

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_function/2
parse/1
types/0
- - - - -

Function Details

- - - - -

get_function/2

- - - - - -`get_function(X1, Table) -> any()` - - - -

parse/1

- - - - - -`parse(In) -> any()` - - - -

types/0

- - - - - -`types() -> any()` - diff --git a/doc/ux_unidata_parser_norm_props.md b/doc/ux_unidata_parser_norm_props.md deleted file mode 100644 index 750fa94..0000000 --- a/doc/ux_unidata_parser_norm_props.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_unidata_parser_norm_props -=================================== - - -

Module ux_unidata_parser_norm_props

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_function/2
parse/1
types/0
- - - - -

Function Details

- - - - -

get_function/2

- - - - - -`get_function(Type, Table) -> any()` - - - -

parse/1

- - - - - -`parse(In) -> any()` - - - -

types/0

- - - - - -`types() -> any()` - diff --git a/doc/ux_unidata_parser_unidata.md b/doc/ux_unidata_parser_unidata.md deleted file mode 100644 index 564a763..0000000 --- a/doc/ux_unidata_parser_unidata.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_unidata_parser_unidata -================================ - - -

Module ux_unidata_parser_unidata

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_function/2
parse/1
types/0
- - - - -

Function Details

- - - - -

get_function/2

- - - - - -`get_function(X1, Table) -> any()` - - - -

parse/1

- - - - - -`parse(In) -> any()` - - - -

types/0

- - - - - -`types() -> any()` - diff --git a/doc/ux_unidata_server.md b/doc/ux_unidata_server.md deleted file mode 100644 index 6988c26..0000000 --- a/doc/ux_unidata_server.md +++ /dev/null @@ -1,116 +0,0 @@ -Module ux_unidata_server -======================== - - -

Module ux_unidata_server

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
get_default/1
handle_call/3
handle_cast/2
handle_info/2
init/1
set_default/1
spawn_waiter/1
start_link/0
terminate/2
- - - - -

Function Details

- - - - -

get_default/1

- - - - - -`get_default(Key) -> any()` - - - -

handle_call/3

- - - - - -`handle_call(X1, From, LoopData) -> any()` - - - -

handle_cast/2

- - - - - -`handle_cast(X1, LoopData) -> any()` - - - -

handle_info/2

- - - - - -`handle_info(X1, LoopData) -> any()` - - - -

init/1

- - - - - -`init(X1) -> any()` - - - -

set_default/1

- - - - - -`set_default(Key) -> any()` - - - -

spawn_waiter/1

- - - - - -`spawn_waiter(Key) -> any()` - - - -

start_link/0

- - - - - -`start_link() -> any()` - - - -

terminate/2

- - - - - -`terminate(Reason, LoopData) -> any()` - diff --git a/doc/ux_unidata_store.md b/doc/ux_unidata_store.md deleted file mode 100644 index 83d057b..0000000 --- a/doc/ux_unidata_store.md +++ /dev/null @@ -1,136 +0,0 @@ -Module ux_unidata_store -======================= - - -

Module ux_unidata_store

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
check_types/2
get_funs/2
handle_call/3
handle_cast/2
handle_info/2
init/1
monitor_client_process/2
remove_type/2
start_link/2
table_list/1
terminate/2
- - - - -

Function Details

- - - - -

check_types/2

- - - - - -`check_types(ServerPid, Types) -> any()` - - - -

get_funs/2

- - - - - -`get_funs(ServerPid, Types) -> any()` - - - -

handle_call/3

- - - - - -`handle_call(X1, From, State) -> any()` - - - -

handle_cast/2

- - - - - -`handle_cast(X1, State) -> any()` - - - -

handle_info/2

- - - - - -`handle_info(X1, State) -> any()` - - - -

init/1

- - - - - -`init(X1) -> any()` - - - -

monitor_client_process/2

- - - - - -`monitor_client_process(ServerPid, ClientPid) -> any()` - - - -

remove_type/2

- - - - - -`remove_type(ServerPid, Type) -> any()` - - - -

start_link/2

- - - - - -`start_link(File, ClientPid) -> any()` - - - -

table_list/1

- - - - - -`table_list(ServerPid) -> any()` - - - -

terminate/2

- - - - - -`terminate(Reason, LoopData) -> any()` - diff --git a/doc/ux_unidata_store_sup.md b/doc/ux_unidata_store_sup.md deleted file mode 100644 index 6a0fe3f..0000000 --- a/doc/ux_unidata_store_sup.md +++ /dev/null @@ -1,58 +0,0 @@ -Module ux_unidata_store_sup -=========================== - - -

Module ux_unidata_store_sup

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
init/1
read_file/2Read file with UNIDATA.
start_link/0
- - - - -

Function Details

- - - - -

init/1

- - - - - -`init(X1) -> any()` - - - -

read_file/2

- - - - - -`read_file(Filename, ClientPid) -> any()` - - - -Read file with UNIDATA. - -

start_link/0

- - - - - -`start_link() -> any()` - diff --git a/doc/ux_utils.md b/doc/ux_utils.md deleted file mode 100644 index 88f9909..0000000 --- a/doc/ux_utils.md +++ /dev/null @@ -1,56 +0,0 @@ -Module ux_utils -=============== - - -

Module ux_utils

- -* [Function Index](#index) -* [Function Details](#functions) - - - - - - -

Function Index

- - - -
is_always_false/1
is_always_true/1
noop/1
- - - - -

Function Details

- - - - -

is_always_false/1

- - - - - -`is_always_false(X1) -> any()` - - - -

is_always_true/1

- - - - - -`is_always_true(X1) -> any()` - - - -

noop/1

- - - - - -`noop(X) -> any()` - diff --git a/doc/ux_wb.md b/doc/ux_wb.md new file mode 100644 index 0000000..46b33bd --- /dev/null +++ b/doc/ux_wb.md @@ -0,0 +1,46 @@ + + +#Module ux_wb# +* [Description](#description) +* [Function Index](#index) +* [Function Details](#functions) + + +Default Word Cluster Boundary Breaker. + + + +##Description## + + +[UTR29: Word Cluster Boundaries] +(http://unicode.org/reports/tr29/#Word_Cluster_Boundaries) + +##Function Index## + + +
split/1
words/1
+ + + + +##Function Details## + + + +###split/1## + + + + +`split(S) -> any()` + + + +###words/1## + + + + +`words(S) -> any()` + diff --git a/ebin/ux.app b/ebin/ux.app index 1c69a14..eb52615 100644 --- a/ebin/ux.app +++ b/ebin/ux.app @@ -1,13 +1,12 @@ {application,ux, [{description,"ux"}, {vsn,"3.0.1"}, - {modules,[ux,ux_app,ux_break,ux_break_tests,ux_char, - ux_char_tests,ux_deps,ux_gb,ux_html,ux_opt_ranges, - ux_ranges,ux_string,ux_string_tests,ux_sup,ux_uca, - ux_uca_alt,ux_uca_compress,ux_uca_extract, + {modules,[ux,ux_app,ux_break,ux_char,ux_deps,ux_gb,ux_html, + ux_opt_ranges,ux_ranges,ux_string,ux_sup,ux_types, + ux_uca,ux_uca_alt,ux_uca_compress,ux_uca_extract, ux_uca_options,ux_uca_sort_key_binary, ux_uca_sort_key_binary_cs,ux_uca_sort_key_list, - ux_uca_sort_key_uncompressed,ux_uca_tests,ux_unidata, + ux_uca_sort_key_uncompressed,ux_uca_utils,ux_unidata, ux_unidata_filelist,ux_unidata_parser, ux_unidata_parser_allkeys,ux_unidata_parser_blocks, ux_unidata_parser_comp_exclusions, diff --git a/rebar.config b/rebar.config index c8739c2..505cfab 100644 --- a/rebar.config +++ b/rebar.config @@ -4,14 +4,15 @@ {d, 'SLOW_TESTS'} ]}. {deps, [ +% We needs the reloader from Mochiweb +% {mochiweb, ".*", +% {git, "git://github.com/mochi/mochiweb.git", "master"}}, +% {edown, ".*", {git, "git://github.com/esl/edown.git", "HEAD"}}, + {abnfc, ".*", {git, "https://github.com/nygge/abnfc.git", "master"}}, {metamodule, ".*", {git, "https://github.com/freeakk/metamodule", "master"}} -% We needs the reloader from Mochiweb -% {mochiweb, ".*", -% {git, "git://github.com/mochi/mochiweb.git", "master"}}, -% {edown, ".*", {git, "git://github.com/esl/edown.git", "HEAD"}} ]}. {cover_enabled, true}. {eunit_opts, [verbose, {report,{eunit_surefire,[{dir,"."}]}}]}. diff --git a/root.xml b/root.xml deleted file mode 100644 index e0f7937..0000000 --- a/root.xml +++ /dev/null @@ -1,275 +0,0 @@ - - - - - - - - - - - - - - - - [เ-ไ ເ-ໄ] - - - ا - - آ - - أ - - إ - - و - ۥ - - ؤ - - ي - ۦ - - ئ - - ى - - - ه - - ة - - - ׳ - ״ - ـ - - - - - - - ᄀᄀ - - - ᄀᄉ - - - - ᄂᄌ - - ᄂᄒ - - - - ᄃᄃ - - - - ᄅᄀ - - ᄅᄆ - - ᄅᄇ - - ᄅᄉ - - ᄅᄐ - - ᄅᄑ - - ᄅᄒ - - - - - - ᄇᄇ - - ᄇᄉ - - - - ᄉᄉ - - - - - - - ᄌᄌ - - - - - - - - - - - - - ᅡᅵ - - ᅣᅵ - - ᅥᅵ - - ᅧᅵ - - ᅩᅡ - - ᅩᅡᅵ - - ᅩᅵ - - ᅮᅴ - - ᅮᅴᅵ - - ᅮᅵ - - - - - - - -

`

- -

´

- ΄ -

˜

-

^

- -

¯

- - - -

˘

-

˙

-

¨

-

˚

-

˝

-

- ᾿ -

-

¸

-

˛

- - -

-

-

-

-

-

-

-

-

-

-

-

-

-

-

﮿

-

-

-

-

- - -

- - -

- - 𐄁 -

𐄂

- - ﴿ -

§

-

- - -

©

-

®

- - -

- - -

- - ؊ -

- - ᰿ -

- - -

-

-

-

-

-

- - -

- - -

- - -

-

-

-

-

-

-

-

-

-

-

-

-

-

- - -

-

- - -

-

-

-

-

-

- - -

-

-

-

- - 𐩾 -

𐩿

- - - s - - یال -
-
-
- -
- - diff --git a/src/uca/ux.hrl b/src/uca/ux.hrl new file mode 100644 index 0000000..e760cfa --- /dev/null +++ b/src/uca/ux.hrl @@ -0,0 +1 @@ +-include("../ux.hrl"). diff --git a/src/ux_uca.hrl b/src/uca/ux_uca.hrl similarity index 70% rename from src/ux_uca.hrl rename to src/uca/ux_uca.hrl index 50293f8..02517c5 100644 --- a/src/ux_uca.hrl +++ b/src/uca/ux_uca.hrl @@ -23,59 +23,22 @@ %%% @end %%% ===================================================================== --type uca_alternate() :: - shifted - | shift_trimmed - | non_ignorable - | blanked - . - - --type uca_case_first() :: - lower - | upper - | off - . - --type uca_strength() :: - 1 | 2 | 3 | 4. - --type uca_sort_key_format() :: - binary - | list % comressed list of weights - | uncompressed % uncompressed list of weights - . - -% For hackers: -% In tr10 and ICU: -% a weight is a sort key! -% uca_weights is Collation Element (CE). -% uca_weight is just int. -% result is no in tr10. -% uca_elem is uca_weights + an variable flag (atom()). --type uca_weight() :: integer(). --type uca_elem() :: [atom()|uca_weight()]. --type uca_array() :: [uca_elem()]. --type result() :: {[uca_elem()], string()}. --type uca_weights() :: [uca_weight()]. - - % TERMINATOR < T < V < L -define(COL_HANGUL_TERMINATOR, 13000). % 12337 - 68 % Records -record(uca_options, { % Generator options - hangul_terminator = ?COL_HANGUL_TERMINATOR :: uca_weight(), + hangul_terminator = ?COL_HANGUL_TERMINATOR :: ux_uca:uca_weight(), natural_sort = true :: boolean(), - strength = 4 :: uca_strength(), - alternate = shifted :: uca_alternate(), + strength = 4 :: ux_uca:uca_strength(), + alternate = shifted :: ux_uca:uca_alternate(), case_sensitive = false, % move L3 to L1 if true - case_first = lower :: uca_case_first(), + case_first = lower :: ux_uca:uca_case_first(), backwards = false :: boolean(), % Other options - sort_key_format = binary :: uca_sort_key_format() + sort_key_format = binary :: ux_uca:uca_sort_key_format() }). diff --git a/src/ux_uca_alt.erl b/src/uca/ux_uca_alt.erl similarity index 98% rename from src/ux_uca_alt.erl rename to src/uca/ux_uca_alt.erl index 6f09395..9db926e 100644 --- a/src/ux_uca_alt.erl +++ b/src/uca/ux_uca_alt.erl @@ -14,7 +14,7 @@ get_alternate_function(C=#uca_options{alternate=Alt}, _D) -> get_function(Alt). --spec get_function(Alt :: uca_alternate()) -> fun(). +-spec get_function(Alt :: ux_uca:uca_alternate()) -> fun(). get_function(non_ignorable) -> fun non_ignorable_weight/1; get_function(blanked) -> diff --git a/src/ux_uca_compress.erl b/src/uca/ux_uca_compress.erl similarity index 98% rename from src/ux_uca_compress.erl rename to src/uca/ux_uca_compress.erl index 1e9aa77..207cc72 100644 --- a/src/ux_uca_compress.erl +++ b/src/uca/ux_uca_compress.erl @@ -1,8 +1,6 @@ -%%% @private -module(ux_uca_compress). -export([reassign_fun/3]). -include("ux.hrl"). --include("ux_unidata_server.hrl"). reassign_fun(_Lvl=1, _Min, Max) -> diff --git a/src/ux_uca_extract.erl b/src/uca/ux_uca_extract.erl similarity index 97% rename from src/ux_uca_extract.erl rename to src/uca/ux_uca_extract.erl index 24c357f..e22a93a 100644 --- a/src/ux_uca_extract.erl +++ b/src/uca/ux_uca_extract.erl @@ -1,18 +1,15 @@ -%%% @private -module(ux_uca_extract). -export([extract/3]). --include("ux_char.hrl"). --include("ux_string.hrl"). +-include("ux.hrl"). -include("ux_uca.hrl"). -%-define(DEBUG_DBG, ok). --ifdef(DEBUG_DBG). --define(DBG(X, Y), error_logger:info_msg(X, Y)). --else. --define(DBG(X, Y), ok). --endif. +-type uca_array() :: ux_uca:uca_array(). +-type uca_weight() :: ux_uca:uca_weight(). +-type uca_elem() :: ux_uca:uca_elem(). +-type result() :: ux_uca:uca_result(). +-type ux_ccc() :: ux_types:ux_ccc(). %% @doc MANUAL: %% S2.1 Find the longest initial substring S at each point @@ -381,13 +378,13 @@ do_extract1([H|T]=S, MFn, Key, OldCCC, Skipped, Res) end; -do_extract1([]=_S, MFn, Key, OldCCC, Skipped, _Res=more) +do_extract1([]=_S, _MFn, _Key, _OldCCC, Skipped, _Res=more) when is_list(Skipped) -> more_error; do_extract1([]=_S, _MFn, _Key, _OldCCC, Skipped, _Res=false) when is_list(Skipped) -> not_found; -do_extract1([]=_S, MFn, Key, OldCCC, Skipped, Res) +do_extract1([]=_S, _MFn, _Key, _OldCCC, Skipped, Res) when is_list(Skipped) -> {result, do_extract1_return(Res), lists:reverse(Skipped)}. diff --git a/src/ux_uca_options.erl b/src/uca/ux_uca_options.erl similarity index 100% rename from src/ux_uca_options.erl rename to src/uca/ux_uca_options.erl diff --git a/src/ux_uca_sort_key_binary.erl b/src/uca/ux_uca_sort_key_binary.erl similarity index 93% rename from src/ux_uca_sort_key_binary.erl rename to src/uca/ux_uca_sort_key_binary.erl index d83be27..8bfdeed 100644 --- a/src/ux_uca_sort_key_binary.erl +++ b/src/uca/ux_uca_sort_key_binary.erl @@ -1,11 +1,16 @@ -%%% @private -module(ux_uca_sort_key_binary). -export([sort_key/2]). -import(ux_uca, [sort_array/2]). +-import(ux_uca_utils, [ + do_alt/2, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). + -include("ux.hrl"). -include("ux_uca.hrl"). --include("ux_uca_common.hrl"). sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) -> W = sort_array(C, S), diff --git a/src/ux_uca_sort_key_binary_cs.erl b/src/uca/ux_uca_sort_key_binary_cs.erl similarity index 94% rename from src/ux_uca_sort_key_binary_cs.erl rename to src/uca/ux_uca_sort_key_binary_cs.erl index f134a2b..fb81580 100644 --- a/src/ux_uca_sort_key_binary_cs.erl +++ b/src/uca/ux_uca_sort_key_binary_cs.erl @@ -1,13 +1,17 @@ %%% This module is a variant of ux_uca_sort_key_binary %%% for case sensative collation. -%%% @private -module(ux_uca_sort_key_binary_cs). -export([sort_key/2]). -import(ux_uca, [sort_array/2]). +-import(ux_uca_utils, [ + do_alt/2, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). -include("ux.hrl"). -include("ux_uca.hrl"). --include("ux_uca_common.hrl"). sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) -> W = sort_array(C, S), diff --git a/src/ux_uca_sort_key_list.erl b/src/uca/ux_uca_sort_key_list.erl similarity index 91% rename from src/ux_uca_sort_key_list.erl rename to src/uca/ux_uca_sort_key_list.erl index 9679262..5b9fe52 100644 --- a/src/ux_uca_sort_key_list.erl +++ b/src/uca/ux_uca_sort_key_list.erl @@ -1,11 +1,16 @@ -%%% @private -module(ux_uca_sort_key_list). -export([sort_key/2]). -import(ux_uca, [sort_array/2]). +-import(ux_uca_utils, [ + do_alt/2, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). + -include("ux.hrl"). -include("ux_uca.hrl"). --include("ux_uca_common.hrl"). sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) -> W = sort_array(C, S), diff --git a/src/ux_uca_sort_key_uncompressed.erl b/src/uca/ux_uca_sort_key_uncompressed.erl similarity index 91% rename from src/ux_uca_sort_key_uncompressed.erl rename to src/uca/ux_uca_sort_key_uncompressed.erl index 72702d5..d26ff38 100644 --- a/src/ux_uca_sort_key_uncompressed.erl +++ b/src/uca/ux_uca_sort_key_uncompressed.erl @@ -1,11 +1,16 @@ -%%% @private -module(ux_uca_sort_key_uncompressed). -export([sort_key/2]). -import(ux_uca, [sort_array/2]). +-import(ux_uca_utils, [ + do_alt/2, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). + -include("ux.hrl"). -include("ux_uca.hrl"). --include("ux_uca_common.hrl"). sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) -> W = sort_array(C, S), diff --git a/src/ux_uca_common.hrl b/src/uca/ux_uca_utils.erl similarity index 85% rename from src/ux_uca_common.hrl rename to src/uca/ux_uca_utils.erl index eb97cbc..5e5d76b 100644 --- a/src/ux_uca_common.hrl +++ b/src/uca/ux_uca_utils.erl @@ -1,25 +1,38 @@ +%% Contains private common functions. +-module(ux_uca_utils). +-export([ + do_alt/2, + do_alt/3, + do_extract/3, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). + +-include("ux_uca.hrl"). + %% %% Helpers %% --spec do_alt(fun(), binary()|integer()) -> [integer()]. --spec get_ducet() -> fun(). --spec do_extract(#uca_options{}, string(), fun()) -> - {integer(), string()}. --spec get_options() -> #uca_options{}. +-spec do_alt(fun(), binary()|integer()) -> [integer()]. do_alt(A, W) -> A(W). do_alt(A, W, S) -> {NewA, AltW} = A(W), NewAltW = lists:sublist(AltW, S), {NewA, NewAltW}. +-spec get_ducet() -> fun(). get_ducet() -> ux_unidata:ducet(skip_check). +-spec do_extract(#uca_options{}, string(), fun()) -> + {integer(), string()}. %% Extract a weight from the string. %% Weights is [<>, <>]. do_extract(C, S, D) -> {_Weights, _NewS} = ux_uca_extract:extract(C, D, S). +-spec get_options() -> #uca_options{}. get_options() -> #uca_options{}. diff --git a/src/unidata/ux.hrl b/src/unidata/ux.hrl new file mode 100644 index 0000000..e760cfa --- /dev/null +++ b/src/unidata/ux.hrl @@ -0,0 +1 @@ +-include("../ux.hrl"). diff --git a/src/ux_unidata_filelist.erl b/src/unidata/ux_unidata_filelist.erl similarity index 99% rename from src/ux_unidata_filelist.erl rename to src/unidata/ux_unidata_filelist.erl index 7ec5c6e..03e7beb 100644 --- a/src/ux_unidata_filelist.erl +++ b/src/unidata/ux_unidata_filelist.erl @@ -3,7 +3,6 @@ %%% @private -module(ux_unidata_filelist). -include("ux.hrl"). --include("ux_unidata_server.hrl"). % OTP -export([start_link/0]). diff --git a/src/ux_unidata_parser.erl b/src/unidata/ux_unidata_parser.erl similarity index 99% rename from src/ux_unidata_parser.erl rename to src/unidata/ux_unidata_parser.erl index a5db983..219f8e9 100644 --- a/src/ux_unidata_parser.erl +++ b/src/unidata/ux_unidata_parser.erl @@ -4,7 +4,6 @@ %%% @private -module(ux_unidata_parser). -include("ux.hrl"). --include("ux_unidata_server.hrl"). -export([run/1, check/1, get_functions/2, get_env/1, set_env/2]). -export([check_types/2]). diff --git a/src/ux_unidata_parser_allkeys.erl b/src/unidata/ux_unidata_parser_allkeys.erl similarity index 98% rename from src/ux_unidata_parser_allkeys.erl rename to src/unidata/ux_unidata_parser_allkeys.erl index 6e4e536..de4f4b2 100644 --- a/src/ux_unidata_parser_allkeys.erl +++ b/src/unidata/ux_unidata_parser_allkeys.erl @@ -3,8 +3,7 @@ %%% @private -module(ux_unidata_parser_allkeys). -include("ux.hrl"). --include("ux_string.hrl"). --include("ux_unidata_server.hrl"). +%-include("ux_string.hrl"). -export([parse/1, types/0, get_function/2 @@ -21,8 +20,10 @@ parse(In) -> OutEl = parse_el(ux_unidata_parser:delete_spaces(Element)), %io:format("String: ~ts, From reversed: ~w, To: ~w~n", [In, InEl, OutEl]), - Res = case InEl of [] -> skip; - _ -> {InEl, OutEl} end, + Res = case InEl of + [] -> skip; + _ -> {InEl, OutEl} + end, {ok, [{ducet, Res} ] diff --git a/src/ux_unidata_parser_blocks.erl b/src/unidata/ux_unidata_parser_blocks.erl similarity index 100% rename from src/ux_unidata_parser_blocks.erl rename to src/unidata/ux_unidata_parser_blocks.erl diff --git a/src/ux_unidata_parser_comp_exclusions.erl b/src/unidata/ux_unidata_parser_comp_exclusions.erl similarity index 100% rename from src/ux_unidata_parser_comp_exclusions.erl rename to src/unidata/ux_unidata_parser_comp_exclusions.erl diff --git a/src/ux_unidata_parser_grapheme_break_property.erl b/src/unidata/ux_unidata_parser_grapheme_break_property.erl similarity index 100% rename from src/ux_unidata_parser_grapheme_break_property.erl rename to src/unidata/ux_unidata_parser_grapheme_break_property.erl diff --git a/src/ux_unidata_parser_norm_props.erl b/src/unidata/ux_unidata_parser_norm_props.erl similarity index 100% rename from src/ux_unidata_parser_norm_props.erl rename to src/unidata/ux_unidata_parser_norm_props.erl diff --git a/src/ux_unidata_parser_scripts.erl b/src/unidata/ux_unidata_parser_scripts.erl similarity index 100% rename from src/ux_unidata_parser_scripts.erl rename to src/unidata/ux_unidata_parser_scripts.erl diff --git a/src/ux_unidata_parser_unidata.erl b/src/unidata/ux_unidata_parser_unidata.erl similarity index 100% rename from src/ux_unidata_parser_unidata.erl rename to src/unidata/ux_unidata_parser_unidata.erl diff --git a/src/ux_unidata_parser_word_break_property.erl b/src/unidata/ux_unidata_parser_word_break_property.erl similarity index 100% rename from src/ux_unidata_parser_word_break_property.erl rename to src/unidata/ux_unidata_parser_word_break_property.erl diff --git a/src/ux_unidata_server.erl b/src/unidata/ux_unidata_server.erl similarity index 99% rename from src/ux_unidata_server.erl rename to src/unidata/ux_unidata_server.erl index dd875cf..9edf734 100644 --- a/src/ux_unidata_server.erl +++ b/src/unidata/ux_unidata_server.erl @@ -13,7 +13,6 @@ %%% @private -module(ux_unidata_server). -include("ux.hrl"). --include("ux_unidata_server.hrl"). -export([start_link/0]). -export([init/1, terminate/2, diff --git a/src/ux_unidata_store.erl b/src/unidata/ux_unidata_store.erl similarity index 99% rename from src/ux_unidata_store.erl rename to src/unidata/ux_unidata_store.erl index 8a5e674..1d4b78d 100644 --- a/src/ux_unidata_store.erl +++ b/src/unidata/ux_unidata_store.erl @@ -1,7 +1,6 @@ %%% @private -module(ux_unidata_store). -include("ux.hrl"). --include("ux_unidata_server.hrl"). -export([start_link/2]). -export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2]). diff --git a/src/ux_unidata_store_sup.erl b/src/unidata/ux_unidata_store_sup.erl similarity index 100% rename from src/ux_unidata_store_sup.erl rename to src/unidata/ux_unidata_store_sup.erl diff --git a/src/ux_opt_ranges.erl b/src/utils/ux_opt_ranges.erl similarity index 99% rename from src/ux_opt_ranges.erl rename to src/utils/ux_opt_ranges.erl index 7ba0cb7..f9d8faa 100644 --- a/src/ux_opt_ranges.erl +++ b/src/utils/ux_opt_ranges.erl @@ -12,7 +12,6 @@ %% `search' function is for the second type. %% %% @end -%% @private -module(ux_opt_ranges). -export([in_list/1, search/2]). diff --git a/src/ux_ranges.erl b/src/utils/ux_ranges.erl similarity index 98% rename from src/ux_ranges.erl rename to src/utils/ux_ranges.erl index 2df87a0..fcbacc7 100644 --- a/src/ux_ranges.erl +++ b/src/utils/ux_ranges.erl @@ -12,7 +12,6 @@ %% `search' function is for the second type. %% %% @end -%% @private -module(ux_ranges). -export([in_list/2,search/2]). diff --git a/src/ux.hrl b/src/ux.hrl index 7ed859e..632cbe2 100644 --- a/src/ux.hrl +++ b/src/ux.hrl @@ -1,3 +1,108 @@ %-define(SLOW_TESTS, true). %-define(UNIDATA_DEBUG, false). +-ifdef(UNIDATA_DEBUG). +-define(DBG(X,Y), error_logger:info_msg(X,Y)). +-else. +-define(DBG(X,Y), ok). +-endif. + +-define(UNIDATA, ux_unidata). + + +% Defines Hangul constants +% Hangul characters can be decompize to LV or LVT forms. + +-define(HANGUL_SBASE, 16#AC00). +-define(HANGUL_LBASE, 16#1100). % 4352 - 4371 +-define(HANGUL_VBASE, 16#1161). % 4449 - 4470 +-define(HANGUL_TBASE, 16#11A7). % 4519 - 4547 +-define(HANGUL_LCOUNT, 19). +-define(HANGUL_VCOUNT, 21). +-define(HANGUL_TCOUNT, 28). +-define(HANGUL_NCOUNT, 588). +-define(HANGUL_SCOUNT, 11172). + +-define(HANGUL_SLAST, ?HANGUL_SBASE + ?HANGUL_SCOUNT). +-define(HANGUL_LLAST, ?HANGUL_LBASE + ?HANGUL_LCOUNT). +-define(HANGUL_VLAST, ?HANGUL_VBASE + ?HANGUL_VCOUNT). +-define(HANGUL_TLAST, ?HANGUL_TBASE + ?HANGUL_TCOUNT). + +-define(CHAR_IS_HANGUL_L(Ch), ( + (Ch>=?HANGUL_LBASE) and (Ch==?HANGUL_VBASE) and (Ch==?HANGUL_TBASE) and (Ch==$1 andalso Ch=<$0)). + + + + + +% CJK_Unified_Ideograph and CJK_Compatibility_Ideographs from +% http://www.unicode.org/Public/UNIDATA/Blocks.txt +-define(CHAR_IS_CJK_UNIFIED_IDEOGRAPH(Ch), ( + (Ch >= 16#4E00) and (Ch =< 16#9FFF) % CJK Unified Ideographs +)). +-define(CHAR_IS_CJK_COMPATIBILITY_IDEOGRAPH(Ch), ( + (Ch >= 16#F900) and (Ch =< 16#FAFF) % CJK Compatibility Ideographs +)). + +% Unified_Ideograph from http://unicode.org/Public/UNIDATA/PropList.txt +-define(CHAR_IS_UNIFIED_IDEOGRAPH(Ch), ( +% [6582] CJK UNIFIED IDEOGRAPH-3400..4DB5 + ((Ch >= 16#3400) and (Ch =< 16#4DB5)) + +% [20940] CJK UNIFIED IDEOGRAPH-4E00..9FCB +or ((Ch >= 16#4E00) and (Ch =< 16#9FCB)) +% FIXED: Error: [55296,33] lower [40908,98] +% CJK Unified Ideographs +%or ((Ch >= 16#4E00) and (Ch =< 16#9FFF)) + +% [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..FA0F + or ((Ch >= 16#FA0E) and (Ch =< 16#FA0F)) + + or ((Ch == 16#FA11) ) % CJK COMPATIBILITY IDEOGRAPH-FA11 + +% [2] CJK COMPATIBILITY IDEOGRAPH-FA13..FA14 + or ((Ch >= 16#FA13) and (Ch =< 16#FA14)) + + or ((Ch == 16#FA1F) ) % CJK COMPATIBILITY IDEOGRAPH-FA1F + or ((Ch == 16#FA21) ) % CJK COMPATIBILITY IDEOGRAPH-FA21 + +% [2] CJK COMPATIBILITY IDEOGRAPH-FA23..FA24 + or ((Ch >= 16#FA23) and (Ch =< 16#FA24)) + +% [3] CJK COMPATIBILITY IDEOGRAPH-FA27..FA29 + or ((Ch >= 16#FA27) and (Ch =< 16#FA29)) + +% [42711] CJK UNIFIED IDEOGRAPH-20000..2A6D6 + or ((Ch >= 16#20000) and (Ch =< 16#2A6D6)) + +% [4149] CJK UNIFIED IDEOGRAPH-2A700..2B734 + or ((Ch >= 16#2A700) and (Ch =< 16#2B734)) + +% [222] CJK UNIFIED IDEOGRAPH-2B740..2B81D + or ((Ch >= 16#2B740) and (Ch =< 16#2B81D)) +)). + + +-define(CHAR_IS_HANGUL(Char), + Char>=16#1100, Char=<16#11FF % Hangul Jamo + ; Char>=16#A960, Char=<16#A97C % Hangul Jamo Extended-A + ; Char>=16#D7B0, Char=<16#D7C6 % Hangul Jamo Extended-B + ; Char>=16#D7CB, Char=<16#D7FB % Hangul Jamo Extended-B + ; Char>=16#3131, Char=<16#318E % Hangul Compatibility Jamo + ; Char==17#302E; Char==16#302F % Tone marks (used in Middle Korean) + ; Char>=16#AC00, Char=<16#D7A3 % 11,172 precomposed Hangul syllables + ; Char>=16#3200, Char=<16#321E % For parenthesised + ; Char>=16#3260, Char=<16#327E % and circled + ; Char>=16#FFDC, Char=<16#FFA0 % For halfwidth +). diff --git a/src/ux_break.erl b/src/ux_break.erl deleted file mode 100644 index 5d4b660..0000000 --- a/src/ux_break.erl +++ /dev/null @@ -1 +0,0 @@ --module(ux_break). diff --git a/src/ux_char.erl b/src/ux_char.erl index 9f52708..0b3b581 100644 --- a/src/ux_char.erl +++ b/src/ux_char.erl @@ -38,8 +38,10 @@ is_unified_ideograph/1, is_hangul_precomposed/1 ]). --include("ux_char.hrl"). --include("ux_unidata.hrl"). +-include("ux.hrl"). + +-type char_type() :: ux_types:char_type(). + -spec to_lower(char()) -> char(); (skip_check) -> fun(). diff --git a/src/ux_char.hrl b/src/ux_char.hrl deleted file mode 100644 index 40d034a..0000000 --- a/src/ux_char.hrl +++ /dev/null @@ -1,124 +0,0 @@ -% vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80: -%%% ===================================================================== -%%% This library is free software; you can redistribute it and/or modify -%%% it under the terms of the GNU Lesser General Public License as -%%% published by the Free Software Foundation; either version 2 of the -%%% License, or (at your option) any later version. -%%% -%%% This library is distributed in the hope that it will be useful, but -%%% WITHOUT ANY WARRANTY; without even the implied warranty of -%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%%% Lesser General Public License for more details. -%%% -%%% You should have received a copy of the GNU Lesser General Public -%%% License along with this library; if not, write to the Free Software -%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -%%% USA -%%% -%%% $Id$ -%%% -%%% @copyright 2010-2011 Michael Uvarov -%%% @author Michael Uvarov -%%% @see ux -%%% @end -%%% ===================================================================== - --type char_type() :: -% Normative Categories: - lu % Letter, Uppercase - | ll % Letter, Lowercase - | lt % Letter, Titlecase - | mn % Mark, Non-Spacing - | mc % Mark, Spacing Combining - | me % Mark, Enclosing - | nd % Number, Decimal Digit - | nl % Number, Letter - | no % Number, Other - | zs % Separator, Space - | zl % Separator, Line - | zp % Separator, Paragraph - | cc % Other, Control - | cf % Other, Format - | cs % Other, Surrogate - | co % Other, Private Use - | cn % Other, Not Assigned (no characters in the file have this property) -% Informative Categories: - | lm % Letter, Modifier - | lo % Letter, Other - | pc % Punctuation, Connector - | pd % Punctuation, Dash - | ps % Punctuation, Open - | pe % Punctuation, Close - | pi % Punctuation, Initial quote (may behave like Ps or Pe depending on - % usage) - | pf % Punctuation, Final quote (may behave like Ps or Pe depending on usage) - | po % Punctuation, Other - | sm % Symbol, Math - | sc % Symbol, Currency - | sk % Symbol, Modifier - | so % Symbol, Other - | other -. - --type ux_ccc() :: 0..240. - -% CJK_Unified_Ideograph and CJK_Compatibility_Ideographs from -% http://www.unicode.org/Public/UNIDATA/Blocks.txt --define(CHAR_IS_CJK_UNIFIED_IDEOGRAPH(Ch), ( - (Ch >= 16#4E00) and (Ch =< 16#9FFF) % CJK Unified Ideographs -)). --define(CHAR_IS_CJK_COMPATIBILITY_IDEOGRAPH(Ch), ( - (Ch >= 16#F900) and (Ch =< 16#FAFF) % CJK Compatibility Ideographs -)). - -% Unified_Ideograph from http://unicode.org/Public/UNIDATA/PropList.txt --define(CHAR_IS_UNIFIED_IDEOGRAPH(Ch), ( -% [6582] CJK UNIFIED IDEOGRAPH-3400..4DB5 - ((Ch >= 16#3400) and (Ch =< 16#4DB5)) - -% [20940] CJK UNIFIED IDEOGRAPH-4E00..9FCB -or ((Ch >= 16#4E00) and (Ch =< 16#9FCB)) -% FIXED: Error: [55296,33] lower [40908,98] -% CJK Unified Ideographs -%or ((Ch >= 16#4E00) and (Ch =< 16#9FFF)) - -% [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..FA0F - or ((Ch >= 16#FA0E) and (Ch =< 16#FA0F)) - - or ((Ch == 16#FA11) ) % CJK COMPATIBILITY IDEOGRAPH-FA11 - -% [2] CJK COMPATIBILITY IDEOGRAPH-FA13..FA14 - or ((Ch >= 16#FA13) and (Ch =< 16#FA14)) - - or ((Ch == 16#FA1F) ) % CJK COMPATIBILITY IDEOGRAPH-FA1F - or ((Ch == 16#FA21) ) % CJK COMPATIBILITY IDEOGRAPH-FA21 - -% [2] CJK COMPATIBILITY IDEOGRAPH-FA23..FA24 - or ((Ch >= 16#FA23) and (Ch =< 16#FA24)) - -% [3] CJK COMPATIBILITY IDEOGRAPH-FA27..FA29 - or ((Ch >= 16#FA27) and (Ch =< 16#FA29)) - -% [42711] CJK UNIFIED IDEOGRAPH-20000..2A6D6 - or ((Ch >= 16#20000) and (Ch =< 16#2A6D6)) - -% [4149] CJK UNIFIED IDEOGRAPH-2A700..2B734 - or ((Ch >= 16#2A700) and (Ch =< 16#2B734)) - -% [222] CJK UNIFIED IDEOGRAPH-2B740..2B81D - or ((Ch >= 16#2B740) and (Ch =< 16#2B81D)) -)). - - --define(CHAR_IS_HANGUL(Char), - Char>=16#1100, Char=<16#11FF % Hangul Jamo - ; Char>=16#A960, Char=<16#A97C % Hangul Jamo Extended-A - ; Char>=16#D7B0, Char=<16#D7C6 % Hangul Jamo Extended-B - ; Char>=16#D7CB, Char=<16#D7FB % Hangul Jamo Extended-B - ; Char>=16#3131, Char=<16#318E % Hangul Compatibility Jamo - ; Char==17#302E; Char==16#302F % Tone marks (used in Middle Korean) - ; Char>=16#AC00, Char=<16#D7A3 % 11,172 precomposed Hangul syllables - ; Char>=16#3200, Char=<16#321E % For parenthesised - ; Char>=16#3260, Char=<16#327E % and circled - ; Char>=16#FFDC, Char=<16#FFA0 % For halfwidth -). diff --git a/src/ux_gb.erl b/src/ux_gb.erl index f98597b..fa5ea6b 100644 --- a/src/ux_gb.erl +++ b/src/ux_gb.erl @@ -5,7 +5,7 @@ %%% @end -module(ux_gb). --include("ux_char.hrl"). +-include("ux.hrl"). -export([split/2]). diff --git a/src/ux_html.erl b/src/ux_html.erl deleted file mode 100644 index f2b22d5..0000000 --- a/src/ux_html.erl +++ /dev/null @@ -1,261 +0,0 @@ --module(ux_html). --export([ - encode/1, - strip_tags/1, strip_tags/2, strip_tags/3]). - - -%% @doc Encodes html special chars. --spec encode(string()) -> string(). - -encode(Str) -> encode(Str, []). - -%% @private -encode([ ], Buf) -> lists:reverse(Buf); -encode([$" | T], Buf) -> encode(T, lists:reverse(""", Buf)); -encode([$' | T], Buf) -> encode(T, lists:reverse("'", Buf)); -encode([$& | T], Buf) -> encode(T, lists:reverse("&", Buf)); -encode([$< | T], Buf) -> encode(T, lists:reverse("<", Buf)); -encode([$> | T], Buf) -> encode(T, lists:reverse(">", Buf)); -encode([H | T], Buf) -> encode(T, [H|Buf]). - - - -%% @doc Deletes tags from the string. -%% -%% Example: -%% ```> ux_html:strip_tags("some string"). -%% "some string" -%% > ux_html:strip_tags("

Head

and paragraf

", ["h1"]). -%% "

Head

and paragraf" -%% ux_html:strip_tags("

Head

", ["!--"]). -%% "Head" -%% ux_html:st("a
b", [], " "). -%% "a b"''' -%% @end --spec strip_tags(string()) -> string(). - -strip_tags(Str) -> - st(Str, []). - - --spec strip_tags(string, [string() | atom() | char()]) -> string(). - -strip_tags(Str, Allowed) -> - st(Str, Allowed). - - - --spec strip_tags(string, [string() | atom() | char()], string()) -> string(). - -strip_tags(Str, Allowed, Alt) -> - st(Str, Allowed, Alt). - - -%% @see ux_html:strip_tags/1 -%% @private -st(Str) -> st_cycle(Str, [], 0, []). -%% @see ux_html:strip_tags/2 -%% @private -st(Str, []) -> st(Str); -st(Str, [$<|Allowed]) -> st(Str, tags_to_list(Allowed)); -st(Str, Allowed) -> st(Str, Allowed, []). -%% @see ux_html:strip_tags/3 -%% @private -st(Str, [], []) -> st(Str); -st(Str, [$<|Allowed], Alt) -> st(Str, tags_to_list(Allowed), Alt); -st(Str, [], Alt) -> st_cycle(Str, [], 0, lists:reverse(Alt)); -st(Str, Allowed, Alt) -> - Fun = ux_char:to_lower(skip_check), - st_cycle_with_allowed(Str, [], - lists:map(fun lists:reverse/1, - lists:map(Fun, - lists:map(fun ux_string:to_string/1, Allowed))), - lists:reverse(Alt)). - -%% @doc Drops all tags from the string. -%% ```Cnt is a count of not closed < -%% If we found <, then Cnt++ -%% If we found >, then Cnt--''' -%% @end -%% @private -st_cycle([$<| Tail], Buf, Cnt, Alt) -> st_cycle(Tail, Buf, Cnt + 1, Alt); -st_cycle([$>| Tail], Buf, 1, Alt) -> st_cycle(Tail, Alt ++ Buf, 0, Alt); -st_cycle([$>| Tail], Buf, 0, Alt) -> st_cycle(Tail, Buf, 0, Alt); -st_cycle([$>| Tail], Buf, Cnt, Alt) -> st_cycle(Tail, Buf, Cnt - 1, Alt); -st_cycle([H | Tail], Buf, 0, Alt) -> st_cycle(Tail, [H | Buf] , 0, Alt); -st_cycle([_ | Tail], Buf, Cnt, Alt) -> st_cycle(Tail, Buf, Cnt, Alt); -st_cycle([ ], Buf, _, _ ) -> lists:reverse(Buf). - -%% @doc Is used by st_cycle_with_allowed -%% @private -%% If Flag = false, then don't append chars (as name of a tag ). -%% If Flag = true (default), then append chars (as the body of the tag). -%% Cnt is a level of subtag (` Cnt=1 Cnt=2 Cnt=1') -%% Returns: {tag_name, tag_body, string_tail} -st_get_tag([$>|T], Buf, Tag, _Flag, 1) -> - {Tag, [$>|Buf], T}; -st_get_tag([$>|T], Buf, Tag, _Flag, Cnt) -> - st_get_tag(T, Buf, Tag, false, Cnt - 1); -st_get_tag([$<|T], Buf, Tag, false, Cnt) -> - st_get_tag(T, Buf, Tag, false, Cnt + 1); -st_get_tag([$ |T], Buf, Tag, _, Cnt) -> - st_get_tag(T, [$ |Buf], Tag, false, Cnt); -st_get_tag([$/|T], Buf, Tag, true, Cnt) -> - st_get_tag(T, [$/|Buf], Tag, true, Cnt); -st_get_tag([H|T], Buf, Tag, true, Cnt) -> - st_get_tag(T, [H|Buf], [H|Tag], true, Cnt); -% TODO: control atributes (onclick, for example. xss fix!) -st_get_tag([H|T], Buf, Tag, false, Cnt) -> - st_get_tag(T, [H|Buf], Tag, false, Cnt); -st_get_tag([], _, _, _, _) -> false; -st_get_tag(_, [], _, _, _) -> false. - -%% @doc Drops tags, but saves tags in the Allowed list. -%% @private -st_cycle_with_allowed([$<|T], Res, Allowed, Alt) -> - case st_get_tag(T, [$<], [], true, 1) of - {Tag, SubStr, Tail} -> - case lists:member(string:to_lower(Tag), Allowed) of - true -> st_cycle_with_allowed(Tail, - SubStr ++ Res, Allowed, Alt); % Allowed tag - false -> st_cycle_with_allowed(Tail, - Alt ++ Res, Allowed, Alt) % Alt is replacement - end; - _ -> lists:reverse(Res) % deletes unclosed string - end; -st_cycle_with_allowed([$>|T], Res, Allowed, Alt) -> - st_cycle_with_allowed(T, Res, Allowed, Alt); -st_cycle_with_allowed([Ch|T], Res, Allowed, Alt) -> - st_cycle_with_allowed(T, [Ch | Res], Allowed, Alt); -st_cycle_with_allowed([], Res, _, _) -> lists:reverse(Res). - -%% @doc Convert string of tags to list -%% Example: -%% ```> tags_to_list(""). -%% ["a", "b"]''' -%% @end -%% @private -tags_to_list(Str) -> tags_to_list(Str, [], []). - -%% @private -tags_to_list([$<|Str], Res, Buf) -> tags_to_list(Str, Res, Buf); -tags_to_list([$/|Str], Res, Buf) -> tags_to_list(Str, Res, Buf); -tags_to_list([$>|Str], Res, Buf) -> tags_to_list(Str, - [lists:reverse(Buf)|Res], []); -tags_to_list([Ch|Str], Res, Buf) -> tags_to_list(Str, Res, [Ch|Buf]); -tags_to_list([], Res, _) -> Res. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -%% -%% Tests -%% - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -tags_to_list_test_() -> - F = fun tags_to_list/1, - [?_assertEqual(F(""), ["b", "a"]) - ,?_assertEqual(F(""), ["span"]) - ,?_assertEqual(F(""), ["span", "b"]) - ,?_assertEqual(F(""), ["i"]) - ]. - - - -html_special_chars_test_() -> - M = 'ux_html', - F = 'encode', - [?_assertEqual(M:F("ddf2#$\""), "ddf2#$"") - ,?_assertEqual(M:F("test1 & test2"), "test1 & test2") - - ,?_assertEqual(M:F(""), "") - ]. - -strip_tags_test_() -> - M = 'ux_html', - F = 'strip_tags', - [?_assertEqual(M:F("a"), "a") - ,?_assertEqual(M:F("a b c"), "a b c") - ,?_assertEqual(M:F("a b c"), "a b c") - ,?_assertEqual(M:F("a b c"), "a b c") - ,{"Check a long tag." - ,[?_assertEqual(M:F("

A B C

"), "A B C") - ,?_assertEqual(M:F("ab"), "ab")]} - ,{"Check allowed tags." - ,[?_assertEqual(M:F("a b c", ["b"]), "a b c") - ,?_assertEqual(M:F("a b c", ["b"]), "a b c") - ,?_assertEqual(M:F("a b c", ["b"]), "a b c") - ,?_assertEqual(M:F("a b c", ["b", "code"]), "a b c") - ,?_assertEqual(M:F("a b c", ["b", "span"]), "a b c") - ]} - ,{"Check a tag with an attribute." - ,[?_assertEqual(M:F("ab", ["b"]), "ab") - ,?_assertEqual(M:F("ab", ["img"]), "ab") - ,?_assertEqual(M:F("a
b", ["br"]), "a
b")]} - ,{"Check an atom in the list allowed tags." - ,[?_assertEqual(M:F("a
b", [br]), "a
b") - ,?_assertEqual(M:F("a
b", [br]), "a
b")]} - ,{"Check a replacement argument." - ,[?_assertEqual(M:F("a b c", [], " "), " a b c ") - ,?_assertEqual(M:F("a b c", [], "tag"), "taga b ctag") - ,?_assertEqual(M:F("a b c", [test], "tag"), "taga b ctag")]} - ,{"PHP format." - ,[?_assertEqual(M:F("a b c", ""), "a b c") - ,?_assertEqual(M:F("a b c", ""), "a b c") - ,?_assertEqual(M:F("
testtest") - ,?_assertEqual(M:F("test"), "test")]} - - ,{"Empty string." - ,[?_assertEqual(M:F("", ""), "") - ,?_assertEqual(M:F("", ""), "") - ,?_assertEqual(M:F("", "a"), "") - ,?_assertEqual(M:F("<", ""), "")]} - ]. - --endif. diff --git a/src/ux_string.erl b/src/ux_string.erl index 1b6a34c..156b6c9 100644 --- a/src/ux_string.erl +++ b/src/ux_string.erl @@ -55,9 +55,7 @@ -include("ux.hrl"). --include("ux_string.hrl"). --include("ux_unidata.hrl"). --include("ux_char.hrl"). +-type char_type() :: ux_types:char_type(). diff --git a/src/ux_string.hrl b/src/ux_string.hrl deleted file mode 100644 index 6a0f79a..0000000 --- a/src/ux_string.hrl +++ /dev/null @@ -1,57 +0,0 @@ -%% vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80: -%%% ===================================================================== -%%% This library is free software; you can redistribute it and/or modify -%%% it under the terms of the GNU Lesser General Public License as -%%% published by the Free Software Foundation; either version 2 of the -%%% License, or (at your option) any later version. -%%% -%%% This library is distributed in the hope that it will be useful, but -%%% WITHOUT ANY WARRANTY; without even the implied warranty of -%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%%% Lesser General Public License for more details. -%%% -%%% You should have received a copy of the GNU Lesser General Public -%%% License along with this library; if not, write to the Free Software -%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -%%% USA -%%% -%%% $Id$ -%%% -%%% @copyright 2010-2011 Michael Uvarov -%%% @author Michael Uvarov -%%% @see ux -%%% @end -%%% ===================================================================== - - -% Defines Hangul constants -% Hangul characters can be decompize to LV or LVT forms. - --define(HANGUL_SBASE, 16#AC00). --define(HANGUL_LBASE, 16#1100). % 4352 - 4371 --define(HANGUL_VBASE, 16#1161). % 4449 - 4470 --define(HANGUL_TBASE, 16#11A7). % 4519 - 4547 --define(HANGUL_LCOUNT, 19). --define(HANGUL_VCOUNT, 21). --define(HANGUL_TCOUNT, 28). --define(HANGUL_NCOUNT, 588). --define(HANGUL_SCOUNT, 11172). - --define(HANGUL_SLAST, ?HANGUL_SBASE + ?HANGUL_SCOUNT). --define(HANGUL_LLAST, ?HANGUL_LBASE + ?HANGUL_LCOUNT). --define(HANGUL_VLAST, ?HANGUL_VBASE + ?HANGUL_VCOUNT). --define(HANGUL_TLAST, ?HANGUL_TBASE + ?HANGUL_TCOUNT). - --define(CHAR_IS_HANGUL_L(Ch), ( - (Ch>=?HANGUL_LBASE) and (Ch==?HANGUL_VBASE) and (Ch==?HANGUL_TBASE) and (Ch==$1 andalso Ch=<$0)). diff --git a/src/ux_types.erl b/src/ux_types.erl new file mode 100644 index 0000000..fd2b3f2 --- /dev/null +++ b/src/ux_types.erl @@ -0,0 +1,43 @@ +-module(ux_types). + +-type char_type() :: +% Normative Categories: + lu % Letter, Uppercase + | ll % Letter, Lowercase + | lt % Letter, Titlecase + | mn % Mark, Non-Spacing + | mc % Mark, Spacing Combining + | me % Mark, Enclosing + | nd % Number, Decimal Digit + | nl % Number, Letter + | no % Number, Other + | zs % Separator, Space + | zl % Separator, Line + | zp % Separator, Paragraph + | cc % Other, Control + | cf % Other, Format + | cs % Other, Surrogate + | co % Other, Private Use + | cn % Other, Not Assigned (no characters in the file have this property) +% Informative Categories: + | lm % Letter, Modifier + | lo % Letter, Other + | pc % Punctuation, Connector + | pd % Punctuation, Dash + | ps % Punctuation, Open + | pe % Punctuation, Close + | pi % Punctuation, Initial quote (may behave like Ps or Pe depending on + % usage) + | pf % Punctuation, Final quote (may behave like Ps or Pe depending on usage) + | po % Punctuation, Other + | sm % Symbol, Math + | sc % Symbol, Currency + | sk % Symbol, Modifier + | so % Symbol, Other + | other +. + +-type ux_ccc() :: 0..240. + +-export_type([char_type/0, + ux_ccc/0]). diff --git a/src/ux_uca.erl b/src/ux_uca.erl index 175c170..95d48cc 100644 --- a/src/ux_uca.erl +++ b/src/ux_uca.erl @@ -160,10 +160,62 @@ % get_options/2 ]). +-import(ux_uca_utils, [ + do_alt/2, + do_alt/3, + do_extract/3, + get_ducet/0, + get_options/0, + split_levels/3, + get_reassign_function/2]). + + +-type uca_alternate() :: + shifted + | shift_trimmed + | non_ignorable + | blanked + . + + +-type uca_case_first() :: + lower + | upper + | off + . + +-type uca_strength() :: + 1 | 2 | 3 | 4. + +-type uca_sort_key_format() :: + binary + | list % comressed list of weights + | uncompressed % uncompressed list of weights + . + +% For hackers: +% In tr10 and ICU: +% a weight is a sort key! +% uca_weights is Collation Element (CE). +% uca_weight is just int. +% result is no in tr10. +% uca_elem is uca_weights + an variable flag (atom()). +-type uca_weight() :: integer(). +-type uca_elem() :: [atom()|uca_weight()]. +-type uca_array() :: [uca_elem()]. +-type result() :: {[uca_elem()], string()}. +-type uca_weights() :: [uca_weight()]. + +-export_type([uca_alternate/0, + uca_case_first/0, + uca_strength/0, + uca_sort_key_format/0, + uca_weight/0, + uca_elem/0, + uca_array/0, + result/0, + uca_weights/0]). --include("ux.hrl"). --include("ux_uca.hrl"). --include("ux_uca_common.hrl"). -type uca_compare_result() :: lower @@ -173,6 +225,9 @@ -type uca_generator() :: fun(). +-include("ux.hrl"). +-include("uca/ux_uca.hrl"). + -spec compare(string(), string()) -> uca_compare_result(). %% @doc Compare two strings and return: lower, greater or equal. diff --git a/src/ux_unidata.erl b/src/ux_unidata.erl index 4f4fed9..bb8b30b 100644 --- a/src/ux_unidata.erl +++ b/src/ux_unidata.erl @@ -55,7 +55,7 @@ break_props/1]). -include("ux_unidata.hrl"). --include("ux_char.hrl"). +-type ux_ccc() :: ux_types:ux_ccc(). diff --git a/src/ux_unidata.hrl b/src/ux_unidata.hrl deleted file mode 100644 index 04586d9..0000000 --- a/src/ux_unidata.hrl +++ /dev/null @@ -1,30 +0,0 @@ -% vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80: -%%% ===================================================================== -%%% This library is free software; you can redistribute it and/or modify -%%% it under the terms of the GNU Lesser General Public License as -%%% published by the Free Software Foundation; either version 2 of the -%%% License, or (at your option) any later version. -%%% -%%% This library is distributed in the hope that it will be useful, but -%%% WITHOUT ANY WARRANTY; without even the implied warranty of -%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%%% Lesser General Public License for more details. -%%% -%%% You should have received a copy of the GNU Lesser General Public -%%% License along with this library; if not, write to the Free Software -%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -%%% USA -%%% -%%% $Id$ -%%% -%%% @copyright 2010-2011 Michael Uvarov -%%% @author Michael Uvarov -%%% @see ux -%%% @end -%%% ===================================================================== - - --define(UNIDATA_VERSION, "UNIDATA"). --define(UCADATA_VERSION, "UCA"). - --define(UNIDATA, ux_unidata). diff --git a/src/ux_unidata_server.hrl b/src/ux_unidata_server.hrl deleted file mode 100644 index 2f3e49d..0000000 --- a/src/ux_unidata_server.hrl +++ /dev/null @@ -1,5 +0,0 @@ --ifdef(UNIDATA_DEBUG). --define(DBG(X,Y), error_logger:info_msg(X,Y)). --else. --define(DBG(X,Y), ok). --endif. diff --git a/src/ux_wb.erl b/src/ux_wb.erl index 2761926..1c6f86c 100644 --- a/src/ux_wb.erl +++ b/src/ux_wb.erl @@ -5,7 +5,7 @@ %%% @end -module(ux_wb). --include("ux_char.hrl"). +-include("ux.hrl"). -export([split/1, words/1]). diff --git a/start-dev.sh b/start-dev.sh index 32d1905..7426c21 100755 --- a/start-dev.sh +++ b/start-dev.sh @@ -5,5 +5,5 @@ make exec erl -pa $PWD/ebin edit $PWD/deps/*/ebin -boot start_sasl \ -sname ux \ -s ux \ - -s reloader +# -s reloader diff --git a/src/ux_break_tests.erl b/test/ux_break_tests.erl similarity index 100% rename from src/ux_break_tests.erl rename to test/ux_break_tests.erl diff --git a/src/ux_char_tests.erl b/test/ux_char_tests.erl similarity index 100% rename from src/ux_char_tests.erl rename to test/ux_char_tests.erl diff --git a/src/ux_string_tests.erl b/test/ux_string_tests.erl similarity index 99% rename from src/ux_string_tests.erl rename to test/ux_string_tests.erl index b5aa850..58684c2 100644 --- a/src/ux_string_tests.erl +++ b/test/ux_string_tests.erl @@ -1,5 +1,4 @@ -module(ux_string_tests). --include("ux.hrl"). %% %% Tests diff --git a/src/ux_tests.hrl b/test/ux_tests.hrl similarity index 86% rename from src/ux_tests.hrl rename to test/ux_tests.hrl index 322a4e6..931cad5 100644 --- a/src/ux_tests.hrl +++ b/test/ux_tests.hrl @@ -1,3 +1,5 @@ +-include("../src/ux.hrl"). + -define(TO(X), {'timeout', 60, X}). -define(_testTO(X), ?TO(?_test(X))). diff --git a/src/ux_uca_tests.erl b/test/ux_uca_tests.erl similarity index 99% rename from src/ux_uca_tests.erl rename to test/ux_uca_tests.erl index a73a5c7..77c7fc7 100644 --- a/src/ux_uca_tests.erl +++ b/test/ux_uca_tests.erl @@ -1,6 +1,5 @@ %%% @private -module(ux_uca_tests). --include("ux.hrl").