forked from purescript-contrib/purescript-unicode
/
Unicode.purs
76 lines (63 loc) · 2.75 KB
/
Unicode.purs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
module Data.String.Unicode
( toUpper
, toLower
, caseFold
, caselessMatch
, toUpperSimple
, toLowerSimple
, caseFoldSimple
) where
import Prelude
import Control.Bind (bindFlipped)
import Data.CodePoint.Unicode as CP
import Data.String (CodePoint, fromCodePointArray, toCodePointArray)
-- Full Unicode conversions
-- | Convert each code point in the string to its corresponding uppercase
-- | sequence. This is the full (locale-independent) Unicode algorithm,
-- | and may map single code points to more than one code point. For example,
-- | `toUpper "ß" == "SS"`.
-- |
-- | Because this matches on more rules, it may be slower than `toUpperSimple`,
-- | but it provides more correct results.
toUpper :: String -> String
toUpper = convertFull CP.toUpper
-- | Convert each code point in the string to its corresponding lower
-- | sequence. This is the full (locale-independent) Unicode algorithm,
-- | and may map single code points to more than one code point. For example,
-- | `toLower "\x0130" == "\x0069\x0307"`.
-- |
-- | Because this matches on more rules, it may be slower than `toLowerSimple`,
-- | but it provides more correct results.
toLower :: String -> String
toLower = convertFull CP.toLower
-- | The full Unicode case folding algorithm, may increase the length of the
-- | string by mapping individual code points to longer sequences.
caseFold :: String -> String
caseFold = convertFull CP.caseFold
-- | Caseless matching, based on `caseFold`.
caselessMatch :: String -> String -> Boolean
caselessMatch s1 s2 = caseFold s1 == caseFold s2
-- Simple code-point-to-code-point conversion algorithms
-- | Convert each code point in the string to its corresponding uppercase
-- | code point. This will preserve the number of code points in the string.
-- |
-- | Note: this is not the full Unicode algorithm, see `toUpper`.
toUpperSimple :: String -> String
toUpperSimple = convert CP.toUpperSimple
-- | Convert each code point in the string to its corresponding lowercase
-- | code point. This will preserve the number of code points in the string.
-- |
-- | Note: this is not the full Unicode algorithm, see `toLower`.
toLowerSimple :: String -> String
toLowerSimple = convert CP.toLowerSimple
-- | Convert each code point in the string to its corresponding case-folded
-- | code point. This will preserve the number of code points in the string.
-- |
-- | Note: this is not the full Unicode algorithm, see `caseFold`.
caseFoldSimple :: String -> String
caseFoldSimple = convert CP.caseFoldSimple
-- Helper functions
convert :: (CodePoint -> CodePoint) -> String -> String
convert f = toCodePointArray >>> map f >>> fromCodePointArray
convertFull :: (CodePoint -> Array CodePoint) -> String -> String
convertFull f = toCodePointArray >>> bindFlipped f >>> fromCodePointArray