diff --git a/Cargo.toml b/Cargo.toml index 6afe3b62..fda4f636 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,15 +2,16 @@ members = [ "rust_icu", "rust_icu_common", + "rust_icu_intl", "rust_icu_sys", "rust_icu_ucal", + "rust_icu_ucol", "rust_icu_udat", "rust_icu_udata", "rust_icu_uenum", "rust_icu_uloc", + "rust_icu_umsg", "rust_icu_ustring", "rust_icu_utext", - "rust_icu_umsg", - "rust_icu_intl", ] diff --git a/Makefile b/Makefile index 400906a2..2790840d 100644 --- a/Makefile +++ b/Makefile @@ -96,6 +96,7 @@ publish: $(call publish,rust_icu_ucal) $(call publish,rust_icu_udat) $(call publish,rust_icu_udata) + $(call publish,rust_icu_ucol) $(call publish,rust_icu_umsg) $(call publish,rust_icu) @@ -124,6 +125,7 @@ uprev: $(call uprev,rust_icu_udata) $(call uprev,rust_icu_umsg) $(call uprev,rust_icu_intl) + $(call uprev,rust_icu_ucol) $(call uprev,rust_icu) cov: diff --git a/README.md b/README.md index ae58ecb2..b3eaff00 100644 --- a/README.md +++ b/README.md @@ -49,14 +49,15 @@ coverage in the headers. | [rust_icu_common](https://crates.io/crates/rust_icu_common)| Commonly used low-level wrappings of the bindings. | | [rust_icu_intl](https://crates.io/crates/rust_icu_intl)| Implements ECMA 402 recommendation APIs. | | [rust_icu_sys](https://crates.io/crates/rust_icu_sys)| Low-level bindings code | -| [rust_icu_ucal](https://crates.io/crates/rust_icu_ucal)| Implements [`ucal.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/ucal_8h.html) C API header from the ICU library. | -| [rust_icu_udat](https://crates.io/crates/rust_icu_udat)| Implements [`udat.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/udat_8h.html) C API header from the ICU library. | -| [rust_icu_udata](https://crates.io/crates/rust_icu_udata)| Implements [`udata.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/udata_8h.html) C API header from the ICU library. | -| [rust_icu_uenum](https://crates.io/crates/rust_icu_uenum)| Implements [`uenum.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uenum_8h.html) C API header from the ICU library. Mainly `UEnumeration` and friends. | -| [rust_icu_uloc](https://crates.io/crates/rust_icu_uloc)| Implements [`uloc.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uloc_8h.html) C API header from the ICU library. | -| [rust_icu_ustring](https://crates.io/crates/rust_icu_ustring)| Implements [`ustring.h`]() C API header from the ICU library. | -| [rust_icu_utext](https://crates.io/crates/rust_icu_utext)| Implements [`utext.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/utext_8h.html) C API header from the ICU library. | -| [rust_icu_umsg](https://crates.io/crates/rust_icu_umsg)| Implements [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html) C API header from the ICU library. | +| [rust_icu_ucal](https://crates.io/crates/rust_icu_ucal)| ICU Calendar. Implements [`ucal.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/ucal_8h.html) C API header from the ICU library. | +| [rust_icu_ucol](https://crates.io/crates/rust_icu_ucol)| Collation support. Implements [`ucol.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/ucol_8h.html) C API header from the ICU library. | +| [rust_icu_udat](https://crates.io/crates/rust_icu_udat)| ICU date and time. Implements [`udat.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/udat_8h.html) C API header from the ICU library. | +| [rust_icu_udata](https://crates.io/crates/rust_icu_udata)| ICU binary data. Implements [`udata.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/udata_8h.html) C API header from the ICU library. | +| [rust_icu_uenum](https://crates.io/crates/rust_icu_uenum)| ICU enumerations. Implements [`uenum.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uenum_8h.html) C API header from the ICU library. Mainly `UEnumeration` and friends. | +| [rust_icu_uloc](https://crates.io/crates/rust_icu_uloc)| Locale support. Implements [`uloc.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uloc_8h.html) C API header from the ICU library. | +| [rust_icu_umsg](https://crates.io/crates/rust_icu_umsg)| MessageFormat support. Implements [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html) C API header from the ICU library. | +| [rust_icu_ustring](https://crates.io/crates/rust_icu_ustring)| ICU strings. Implements [`ustring.h`]() C API header from the ICU library. | +| [rust_icu_utext](https://crates.io/crates/rust_icu_utext)| Text operations. Implements [`utext.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/utext_8h.html) C API header from the ICU library. | # Limitations diff --git a/build/showprogress.sh b/build/showprogress.sh index b8c720be..f715b949 100755 --- a/build/showprogress.sh +++ b/build/showprogress.sh @@ -9,6 +9,7 @@ cd $TOP_DIR C_API_HEADER_NAMES=( "ucal" + "ucol" "udat" "udata" "uenum" diff --git a/coverage/report.md b/coverage/report.md index bf9bfc2c..d62bba9f 100644 --- a/coverage/report.md +++ b/coverage/report.md @@ -2,7 +2,8 @@ | Header | Implemented | | ------ | ----------- | -| `ucal.h` | 15 / 46 | +| `ucal.h` | 15 / 45 | +| `ucol.h` | 2 / 50 | | `udat.h` | 6 / 38 | | `udata.h` | 2 / 8 | | `uenum.h` | 8 / 8 | @@ -46,7 +47,6 @@ | `ucal_getDSTSavings` | | | `ucal_getFieldDifference` | | | `ucal_getGregorianChange` | | -| `ucal_getHostTimeZone` | | | `ucal_getKeywordValuesForLocale` | | | `ucal_getLimit` | | | `ucal_getLocaleByType` | | @@ -65,6 +65,61 @@ | `ucal_setGregorianChange` | | | `ucal_setTimeZone` | | +# Header: `ucol.h` + +| Unimplemented | Implemented | +| ------------- | ----------- | +| | `ucol_close` | +| | `ucol_strcoll` | +| `ucol_cloneBinary` | | +| `ucol_countAvailable` | | +| `ucol_equal` | | +| `ucol_getAttribute` | | +| `ucol_getAvailable` | | +| `ucol_getBound` | | +| `ucol_getContractions` | | +| `ucol_getContractionsAndExpansions` | | +| `ucol_getDisplayName` | | +| `ucol_getEquivalentReorderCodes` | | +| `ucol_getFunctionalEquivalent` | | +| `ucol_getKeywords` | | +| `ucol_getKeywordValues` | | +| `ucol_getKeywordValuesForLocale` | | +| `ucol_getLocale` | | +| `ucol_getLocaleByType` | | +| `ucol_getMaxVariable` | | +| `ucol_getReorderCodes` | | +| `ucol_getRules` | | +| `ucol_getRulesEx` | | +| `ucol_getShortDefinitionString` | | +| `ucol_getSortKey` | | +| `ucol_getStrength` | | +| `ucol_getTailoredSet` | | +| `ucol_getUCAVersion` | | +| `ucol_getUnsafeSet` | | +| `ucol_getVariableTop` | | +| `ucol_getVersion` | | +| `ucol_greater` | | +| `ucol_greaterOrEqual` | | +| `ucol_mergeSortkeys` | | +| `ucol_nextSortKeyPart` | | +| `ucol_normalizeShortDefinitionString` | | +| `ucol_open` | | +| `ucol_openAvailableLocales` | | +| `ucol_openBinary` | | +| `ucol_openFromShortString` | | +| `ucol_openRules` | | +| `ucol_prepareShortStringOpen` | | +| `ucol_restoreVariableTop` | | +| `ucol_safeClone` | | +| `ucol_setAttribute` | | +| `ucol_setMaxVariable` | | +| `ucol_setReorderCodes` | | +| `ucol_setStrength` | | +| `ucol_setVariableTop` | | +| `ucol_strcollIter` | | +| `ucol_strcollUTF8` | | + # Header: `udat.h` | Unimplemented | Implemented | diff --git a/coverage/ucal_all.txt b/coverage/ucal_all.txt index 124f1336..9b93ddd4 100644 --- a/coverage/ucal_all.txt +++ b/coverage/ucal_all.txt @@ -14,7 +14,6 @@ ucal_getDefaultTimeZone ucal_getDSTSavings ucal_getFieldDifference ucal_getGregorianChange -ucal_getHostTimeZone ucal_getKeywordValuesForLocale ucal_getLimit ucal_getLocaleByType diff --git a/coverage/ucol_all.txt b/coverage/ucol_all.txt new file mode 100644 index 00000000..6859c8d1 --- /dev/null +++ b/coverage/ucol_all.txt @@ -0,0 +1,50 @@ +ucol_cloneBinary +ucol_close +ucol_countAvailable +ucol_equal +ucol_getAttribute +ucol_getAvailable +ucol_getBound +ucol_getContractions +ucol_getContractionsAndExpansions +ucol_getDisplayName +ucol_getEquivalentReorderCodes +ucol_getFunctionalEquivalent +ucol_getKeywords +ucol_getKeywordValues +ucol_getKeywordValuesForLocale +ucol_getLocale +ucol_getLocaleByType +ucol_getMaxVariable +ucol_getReorderCodes +ucol_getRules +ucol_getRulesEx +ucol_getShortDefinitionString +ucol_getSortKey +ucol_getStrength +ucol_getTailoredSet +ucol_getUCAVersion +ucol_getUnsafeSet +ucol_getVariableTop +ucol_getVersion +ucol_greater +ucol_greaterOrEqual +ucol_mergeSortkeys +ucol_nextSortKeyPart +ucol_normalizeShortDefinitionString +ucol_open +ucol_openAvailableLocales +ucol_openBinary +ucol_openFromShortString +ucol_openRules +ucol_prepareShortStringOpen +ucol_restoreVariableTop +ucol_safeClone +ucol_setAttribute +ucol_setMaxVariable +ucol_setReorderCodes +ucol_setStrength +ucol_setVariableTop +ucol_strcoll +ucol_strcollIter +ucol_strcollUTF8 diff --git a/coverage/ucol_implemented.txt b/coverage/ucol_implemented.txt new file mode 100644 index 00000000..2c5f67d7 --- /dev/null +++ b/coverage/ucol_implemented.txt @@ -0,0 +1,2 @@ +ucol_close +ucol_strcoll diff --git a/rust_icu/Cargo.toml b/rust_icu/Cargo.toml index 0b716e43..4ba7e2e0 100644 --- a/rust_icu/Cargo.toml +++ b/rust_icu/Cargo.toml @@ -25,6 +25,7 @@ rust_icu_udat = { path = "../rust_icu_udat", version = "0.1.4", default-features rust_icu_udata = { path = "../rust_icu_udata", version = "0.1.4", default-features = false } rust_icu_uenum = { path = "../rust_icu_uenum", version = "0.1.4", default-features = false } rust_icu_uloc = { path = "../rust_icu_uloc", version = "0.1.4", default-features = false } +rust_icu_ucol = { path = "../rust_icu_ucol", version = "0.1.4", default-features = false } rust_icu_umsg = { path = "../rust_icu_umsg", version = "0.1.4", default-features = false } rust_icu_ustring = { path = "../rust_icu_ustring", version = "0.1.4", default-features = false } rust_icu_utext = { path = "../rust_icu_utext", version = "0.1.4", default-features = false } @@ -38,6 +39,7 @@ use-bindgen = [ "rust_icu_common/use-bindgen", "rust_icu_sys/use-bindgen", "rust_icu_ucal/use-bindgen", + "rust_icu_ucol/use-bindgen", "rust_icu_udat/use-bindgen", "rust_icu_udata/use-bindgen", "rust_icu_uenum/use-bindgen", @@ -50,6 +52,7 @@ renaming = [ "rust_icu_common/renaming", "rust_icu_sys/renaming", "rust_icu_ucal/renaming", + "rust_icu_ucol/renaming", "rust_icu_udat/renaming", "rust_icu_udata/renaming", "rust_icu_uenum/renaming", @@ -62,6 +65,7 @@ icu_config = [ "rust_icu_common/icu_config", "rust_icu_sys/icu_config", "rust_icu_ucal/icu_config", + "rust_icu_ucol/icu_config", "rust_icu_udat/icu_config", "rust_icu_udata/icu_config", "rust_icu_uenum/icu_config", @@ -74,6 +78,7 @@ icu_version_in_env = [ "rust_icu_common/icu_version_in_env", "rust_icu_sys/icu_version_in_env", "rust_icu_ucal/icu_version_in_env", + "rust_icu_ucol/icu_version_in_env", "rust_icu_udat/icu_version_in_env", "rust_icu_udata/icu_version_in_env", "rust_icu_uenum/icu_version_in_env", diff --git a/rust_icu/src/lib.rs b/rust_icu/src/lib.rs index 707f2d02..b849daac 100644 --- a/rust_icu/src/lib.rs +++ b/rust_icu/src/lib.rs @@ -27,22 +27,26 @@ //! //! | Original | Remapped | //! | -------- | -------- | -//! | rust_icu_sys | icu::sys | //! | rust_icu_common | icu::common | +//! | rust_icu_sys | icu::sys | //! | rust_icu_ucal | icu::cal | +//! | rust_icu_ucol | icu::col | //! | rust_icu_udat | icu::dat | //! | rust_icu_udata | icu::data | //! | rust_icu_uenum | icu::enums | +//! | rust_icu_uloc | icu::loc | //! | rust_icu_umsg | icu::msg | //! | rust_icu_ustring | icu::string | //! | rust_icu_utext | text | -pub use rust_icu_sys as sys; pub use rust_icu_common as common; +pub use rust_icu_sys as sys; pub use rust_icu_ucal as cal; +pub use rust_icu_ucol as col; pub use rust_icu_udat as dat; pub use rust_icu_udata as data; pub use rust_icu_uenum as enums; +pub use rust_icu_uloc as loc; pub use rust_icu_umsg as msg; pub use rust_icu_ustring as string; pub use rust_icu_utext as text; diff --git a/rust_icu_sys/build.rs b/rust_icu_sys/build.rs index 7e1771fe..c7ee80ea 100644 --- a/rust_icu_sys/build.rs +++ b/rust_icu_sys/build.rs @@ -36,7 +36,7 @@ lazy_static! { // headers. Any of these will fail if the required binaries are not present in $PATH. static ref BINDGEN_SOURCE_MODULES: Vec<&'static str> = vec![ "ucal", "udat", "udata", "uenum", "ustring", "utext", "uclean", "umsg", - "ucol", + "ucol", "uset", ]; // C functions that will be made available to rust code. Add more to this list if you want to @@ -50,6 +50,7 @@ lazy_static! { "uloc_.*", "utext_.*", "umsg_.*", + "ucol_.*", ]; // C types that will be made available to rust code. Add more to this list if you want to @@ -67,6 +68,9 @@ lazy_static! { "UMessageFormat", "UParseError", "UText", + "UCollator", + "USet", + "UCol.*", ]; } diff --git a/rust_icu_ucol/Cargo.toml b/rust_icu_ucol/Cargo.toml new file mode 100644 index 00000000..8999a323 --- /dev/null +++ b/rust_icu_ucol/Cargo.toml @@ -0,0 +1,69 @@ +[package] +authors = ["Google Inc."] +edition = "2018" +license = "Apache-2.0" +name = "rust_icu_ucol" +readme = "README.md" +repository = "https://github.com/google/rust_icu" +version = "0.1.4" +default-features = false +keywords = ["icu", "unicode", "i18n", "l10n"] + +description = """ +Native bindings to the ICU4C library from Unicode. + +- ucol.h: Collation support +""" + +[dependencies] +log = "0.4.6" +paste = "0.1.5" +rust_icu_common = { path = "../rust_icu_common", version = "0.1.4", default-features = false } +rust_icu_sys = { path = "../rust_icu_sys", version = "0.1.4", default-features = false } +rust_icu_uenum = { path = "../rust_icu_uenum", version = "0.1.4", default-features = false } +rust_icu_ustring = { path = "../rust_icu_ustring", version = "0.1.4", default-features = false } +anyhow = "1.0.25" + +[dev-dependencies] +anyhow = "1.0.25" + +# See the feature description in ../rust_icu_sys/Cargo.toml for details. +[features] +default = ["use-bindgen", "renaming", "icu_config"] + +use-bindgen = [ + "rust_icu_common/use-bindgen", + "rust_icu_sys/use-bindgen", + "rust_icu_uenum/use-bindgen", + "rust_icu_ustring/use-bindgen", +] +renaming = [ + "rust_icu_common/renaming", + "rust_icu_sys/renaming", + "rust_icu_uenum/renaming", + "rust_icu_ustring/renaming", +] +icu_config = [ + "rust_icu_common/icu_config", + "rust_icu_sys/icu_config", + "rust_icu_uenum/icu_config", + "rust_icu_ustring/icu_config", +] +icu_version_in_env = [ + "rust_icu_common/icu_version_in_env", + "rust_icu_sys/icu_version_in_env", + "rust_icu_uenum/icu_version_in_env", + "rust_icu_ustring/icu_version_in_env", +] +icu_version_64_plus = [] +icu_version_67_plus = [] + +[build-dependencies] +anyhow = "1.0" +bindgen = "0.53.2" + +[badges] +maintenance = { status = "actively-developed" } +is-it-maintained-issue-resolution = { repository = "google/rust_icu" } +is-it-maintained-open-issues = { repository = "google/rust_icu" } +travis-ci = { repository = "google/rust_icu", branch = "master" } diff --git a/rust_icu_ucol/README.md b/rust_icu_ucol/README.md new file mode 120000 index 00000000..32d46ee8 --- /dev/null +++ b/rust_icu_ucol/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/rust_icu_ucol/src/lib.rs b/rust_icu_ucol/src/lib.rs new file mode 100644 index 00000000..c3b75af9 --- /dev/null +++ b/rust_icu_ucol/src/lib.rs @@ -0,0 +1,215 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! # ICU collation support for rust +//! +//! This crate provides [collation](https://en.wikipedia.org/wiki/Unicode_collation_algorithm) +//! (locale-sensitive string ordering), based on the collation as implemented by the ICU library. +//! Specifically the functionality exposed through its C API, as available in the [header +//! `ucol.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/ucol_8h.html). +//! +//! The main type is [UCollator], which can be created using `UCollator::try_from` from a `&str`. +//! +//! A detailed discussion of collation is out of scope of source code documentation. An interested +//! reader can check out the [collation documentation on the ICU user +//! guide](http://userguide.icu-project.org/collation). +//! +//! Are you missing some features from this crate? Consider [reporting an +//! issue](https://github.com/google/rust_icu/issues) or even [contributing the +//! functionality](https://github.com/google/rust_icu/pulls). +//! +//! ## Examples +//! +//! Some example code for the use of collation is given below. +//! +//! First off, the more low-level API, which uses [ustring::UChar] is the following, which requires +//! a conversion to [ustring::UChar] prior to use. This function is mostly used in algorithms that +//! compose Unicode functionality. +//! +//! ``` +//! use rust_icu_ustring as ustring; +//! use rust_icu_ucol as ucol; +//! use std::convert::TryFrom; +//! let collator = ucol::UCollator::try_from("sr-Latn").expect("collator"); +//! let mut mixed_up = vec!["d", "dž", "đ", "a", "b", "c", "č", "ć"]; +//! mixed_up.sort_by(|a, b| { +//! let first = ustring::UChar::try_from(*a).expect("first"); +//! let second = ustring::UChar::try_from(*b).expect("second"); +//! collator.strcoll(&first, &second) +//! }); +//! let alphabet = vec!["a", "b", "c", "č", "ć", "d", "dž", "đ"]; +//! assert_eq!(alphabet, mixed_up); +//! ``` +//! A more rustful API is [UCollator::strcoll_utf8] which can operate on rust `AsRef` and can +//! be used without converting the input data ahead of time. +//! +//! ``` +//! use rust_icu_ustring as ustring; +//! use rust_icu_ucol as ucol; +//! use std::convert::TryFrom; +//! let collator = ucol::UCollator::try_from("sr-Latn").expect("collator"); +//! let mut mixed_up = vec!["d", "dž", "đ", "a", "b", "c", "č", "ć"]; +//! mixed_up.sort_by(|a, b| collator.strcoll_utf8(a, b).expect("strcoll_utf8")); +//! let alphabet = vec!["a", "b", "c", "č", "ć", "d", "dž", "đ"]; +//! assert_eq!(alphabet, mixed_up); +//! ``` +use { + rust_icu_common as common, rust_icu_sys as sys, + rust_icu_sys::versioned_function, + rust_icu_sys::*, + rust_icu_ustring as ustring, + std::{cmp::Ordering, convert::TryFrom, ffi, ptr}, +}; + +#[derive(Debug)] +pub struct UCollator { + rep: ptr::NonNull, +} + +impl Drop for UCollator { + /// Releases the resources taken up by a single collator. + /// + /// Implements `ucol_close` + fn drop(&mut self) { + unsafe { versioned_function!(ucol_close)(self.rep.as_ptr()) }; + } +} + +impl TryFrom<&str> for UCollator { + type Error = common::Error; + /// Makes a new collator from the supplied locale, e.g. `en-US`, or + /// `de@collation=phonebook`. + /// + /// Other examples: + /// + /// * `el-u-kf-upper` + /// * `el@colCaseFirst=upper` + /// + /// Implements ucol_open + fn try_from(locale: &str) -> Result { + let locale_cstr = ffi::CString::new(locale)?; + let mut status = common::Error::OK_CODE; + // Unsafety note: this is the way to create the collator. We expect all + // the passed-in values to be well-formed. + let rep = unsafe { + assert!(common::Error::is_ok(status)); + versioned_function!(ucol_open)(locale_cstr.as_ptr(), &mut status) as *mut sys::UCollator + }; + common::Error::ok_or_warning(status)?; + Ok(UCollator { + rep: ptr::NonNull::new(rep).unwrap(), + }) + } +} + +impl UCollator { + /// Compares strings `first` and `second` according to the collation rules in this collator. + /// + /// Returns [Ordering::Less] if `first` compares as less than `second`, and for other return + /// codes respectively. + /// + /// Implements `ucol_strcoll` + pub fn strcoll(&self, first: &ustring::UChar, second: &ustring::UChar) -> Ordering { + let result = unsafe { + assert!(first.len() <= std::i32::MAX as usize); + assert!(second.len() <= std::i32::MAX as usize); + versioned_function!(ucol_strcoll)( + self.rep.as_ptr(), + first.as_c_ptr(), + first.len() as i32, + second.as_c_ptr(), + second.len() as i32, + ) + }; + UCollator::to_rust_ordering(result) + } + + /// Compares strings `first` and `second` according to the collation rules in this collator. + /// + /// Returns [Ordering::Less] if `first` compares as less than `second`, and for other return + /// codes respectively. + /// + /// In contrast to [UCollator::strcoll], this function requires no string conversions to + /// compare two rust strings. + /// + /// Implements `ucol_strcoll` + pub fn strcoll_utf8( + &self, + first: impl AsRef, + second: impl AsRef, + ) -> Result { + let mut status = common::Error::OK_CODE; + // Unsafety note: + // - AsRef is always well formed UTF-8 in rust. + let result = unsafe { + assert!(first.as_ref().len() <= std::i32::MAX as usize); + assert!(second.as_ref().len() <= std::i32::MAX as usize); + versioned_function!(ucol_strcollUTF8)( + self.rep.as_ptr(), + first.as_ref().as_ptr() as *const i8, + first.as_ref().len() as i32, + second.as_ref().as_ptr() as *const i8, + second.as_ref().len() as i32, + &mut status, + ) + }; + common::Error::ok_or_warning(status)?; + Ok(UCollator::to_rust_ordering(result)) + } + + // Converts ICU ordering result type to a Rust ordering result type. + fn to_rust_ordering(result: sys::UCollationResult) -> Ordering { + match result { + sys::UCollationResult::UCOL_LESS => Ordering::Less, + sys::UCollationResult::UCOL_GREATER => Ordering::Greater, + sys::UCollationResult::UCOL_EQUAL => Ordering::Equal, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic() { + let _ = crate::UCollator::try_from("de@collation=phonebook").expect("collator created"); + } + + #[test] + fn strcoll_utf8_test() -> Result<(), common::Error> { + let collator = crate::UCollator::try_from("sr-Latn")?; + let mut mixed_up = vec!["d", "dž", "đ", "a", "b", "c", "č", "ć"]; + mixed_up.sort_by(|a, b| collator.strcoll_utf8(a, b).expect("strcoll_utf8")); + + let alphabet = vec!["a", "b", "c", "č", "ć", "d", "dž", "đ"]; + assert_eq!(alphabet, mixed_up); + Ok(()) + } + + #[test] + fn strcoll_test() -> Result<(), common::Error> { + let collator = crate::UCollator::try_from("sr-Latn")?; + let mut mixed_up = vec!["d", "dž", "đ", "a", "b", "c", "č", "ć"]; + mixed_up.sort_by(|a, b| { + let first = ustring::UChar::try_from(*a).expect("first"); + let second = ustring::UChar::try_from(*b).expect("second"); + collator.strcoll(&first, &second) + }); + + let alphabet = vec!["a", "b", "c", "č", "ć", "d", "dž", "đ"]; + assert_eq!(alphabet, mixed_up); + Ok(()) + } +}