From 2b6f1b59eaf243f5e173ba684a982342d97c5948 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Wed, 15 Sep 2010 04:33:40 +0000 Subject: [PATCH] Combining class, and digit conversion --HG-- extra : convert_revision : f801550777325b50474f1ff21cc32572db3bc631 --- Data/Text/ICU/Char.hs | 30 ++++++++++++++++++++++++++++++ cbits/text_icu.c | 11 +++++++++++ include/hs_text_icu.h | 2 ++ 3 files changed, 43 insertions(+) diff --git a/Data/Text/ICU/Char.hs b/Data/Text/ICU/Char.hs index a44e354..0a1e69b 100644 --- a/Data/Text/ICU/Char.hs +++ b/Data/Text/ICU/Char.hs @@ -17,14 +17,18 @@ module Data.Text.ICU.Char , Direction(..) -- * Functions , blockCode + , combiningClass + , digitToInt , direction , isMirrored , mirror ) where import Data.Char (chr, ord) +import Data.Int (Int32) import Data.Text.ICU.Internal (UBool, UChar32, asBool) import Data.Typeable (Typeable) +import Data.Word (Word8) import Foreign.C.Types (CInt) -- | The language directional property of a character set. @@ -264,6 +268,26 @@ mirror :: Char -> Char mirror = chr . fromIntegral . u_charMirror . fromIntegral . ord {-# INLINE mirror #-} +combiningClass :: Char -> Int +combiningClass = fromIntegral . u_getCombiningClass . fromIntegral . ord +{-# INLINE combiningClass #-} + +-- | Return the decimal digit value of a decimal digit character. +-- Such characters have the general category @Nd@ (decimal digit +-- numbers) and a @Numeric_Type@ of @Decimal@. +-- +-- No digit values are returned for any Han characters, because Han +-- number characters are often used with a special Chinese-style +-- number format (with characters for powers of 10 in between) instead +-- of in decimal-positional notation. Unicode 4 explicitly assigns +-- Han number characters a @Numeric_Type@ of @Numeric@ instead of +-- @Decimal@. +digitToInt :: Char -> Maybe Int +digitToInt c + | i == -1 = Nothing + | otherwise = Just $! fromIntegral i + where i = u_charDigitValue . fromIntegral . ord $ c + type UBlockCode = CInt type UCharDirection = CInt @@ -278,3 +302,9 @@ foreign import ccall unsafe "hs_text_icu.h __hs_u_isMirrored" u_isMirrored foreign import ccall unsafe "hs_text_icu.h __hs_u_charMirror" u_charMirror :: UChar32 -> UChar32 + +foreign import ccall unsafe "hs_text_icu.h __hs_u_getCombingingClass" u_getCombiningClass + :: UChar32 -> Word8 + +foreign import ccall unsafe "hs_text_icu.h __hs_u_charDigitValue" u_charDigitValue + :: UChar32 -> Int32 diff --git a/cbits/text_icu.c b/cbits/text_icu.c index 458db1e..b546af6 100644 --- a/cbits/text_icu.c +++ b/cbits/text_icu.c @@ -295,3 +295,14 @@ UChar32 __hs_u_charMirror(UChar32 c) { return u_charMirror(c); } + +uint8_t __hs_u_getCombiningClass(UChar32 c) +{ + return u_getCombiningClass(c); +} + +int32_t __hs_u_charDigitValue(UChar32 c) +{ + return u_charDigitValue(c); +} + diff --git a/include/hs_text_icu.h b/include/hs_text_icu.h index 12990d9..f9e1ad5 100644 --- a/include/hs_text_icu.h +++ b/include/hs_text_icu.h @@ -36,6 +36,8 @@ UBlockCode __hs_ublock_getCode(UChar32 c); UCharDirection __hs_u_charDirection(UChar32 c); UBool __hs_u_isMirrored(UChar32 c); UChar32 __hs_u_charMirror(UChar32 c); +uint8_t __hs_u_getCombiningClass(UChar32 c); +int32_t __hs_u_charDigitValue(UChar32 c); /* ucol.h */