From 11ce68a55511b0af4dae2983a8e98cada22c03c3 Mon Sep 17 00:00:00 2001 From: ekmett Date: Sat, 3 Jul 2010 13:27:29 -0700 Subject: [PATCH] repository initialized --- Data/CharSet.hs | 567 ++++++++++++++++++++++++++++++++++++++++++++++++ LICENSE | 31 +++ Setup.lhs | 8 + charset.cabal | 23 ++ 4 files changed, 629 insertions(+) create mode 100644 Data/CharSet.hs create mode 100644 LICENSE create mode 100755 Setup.lhs create mode 100644 charset.cabal diff --git a/Data/CharSet.hs b/Data/CharSet.hs new file mode 100644 index 0000000..a858c54 --- /dev/null +++ b/Data/CharSet.hs @@ -0,0 +1,567 @@ +{-# LANGUAGE BangPatterns, CPP #-} +----------------------------------------------------------------------------- +-- | +-- Module : Data.CharSet +-- Copyright : (c) Edward Kmett 2010 +-- License : BSD3 +-- Maintainer : ekmett@gmail.com +-- Stability : experimental +-- Portability : portable +-- +-- Encode unicode character sets as arbitrary precision floating point values +-- using the least character in the set as the exponent. Can efficiently represent +-- reasonably tightly grouped character sets, but may use up to 139KiB to represent +-- a particularly sparse set. +-- +-- Designed to be imported qualified: +-- +-- > import Data.CharSet (CharSet) +-- > import qualified Data.CharSet as CharSet +------------------------------------------------------------------------------- + +module Data.CharSet + ( + -- * CharSet + CharSet + , build + -- * Manipulation + , empty + , singleton + , full + , union + , intersection + , complement + , insert + , delete + , (\\) + , fromList + , fromDistinctAscList + , toArray + -- * Accessors + , null + , size + , member + , elem + , notElem + , isComplemented + , toInteger + -- * Builtins + -- ** POSIX + , posixAscii + -- ** Unicode + , UnicodeCategory(..) + , unicodeCategories + -- ** Data.Char classifiers + , control, space, lower, upper, alpha, alphaNum + , print, digit, octDigit, letter, mark, number + , punctuation, symbol, separator, ascii, latin1, asciiUpper, asciiLower + ) where + +import Data.Array hiding (range) +import qualified Data.Bits as Bits +import Data.Bits hiding (complement) +import Data.Char +import Data.Data +import Data.Function (on) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Monoid (Monoid(..)) +import Prelude hiding (null, exponent, toInteger, elem, notElem, print, pi) +import Text.Read + +data CharSet = CS + { _countAtLeast :: {-# UNPACK #-} !Int -- ^ A conservative upper bound on the element count. + -- If negative, we are complemented with respect to the universe + , _countAtMost :: {-# UNPACK #-} !Int -- ^ A conservative lower bound on the element count. + -- If negative, we are complemented with respect to the universe + , _count :: Int -- ^ Lazy element count used when the above two disagree. O(1) environment size + , exponent :: {-# UNPACK #-} !Int -- ^ Low water mark. index of the least element potentially in the set. + , _hwm :: {-# UNPACK #-} !Int -- ^ High water mark. index of the greatest element potentially in the set. + , mantissa :: {-# UNPACK #-} !Integer -- ^ the set of bits starting from the exponent. + -- if negative, then we are complemented with respect to universe + } + + +ul, uh :: Char +ul = minBound +uh = maxBound +{-# INLINE ul #-} +{-# INLINE uh #-} + +ol, oh :: Int +ol = fromEnum ul +oh = fromEnum uh +{-# INLINE ol #-} +{-# INLINE oh #-} + +-- | Internal smart constructor. Forces count whenever it is pigeonholed. +bs :: Int -> Int -> Int -> Int -> Int -> Integer -> CharSet +bs !a !b c !l !h !m | a == b = CS a a a l h m + | otherwise = CS a b c l h m +{-# INLINE bs #-} + +-- | /O(d)/ where /d/ is absolute deviation in fromEnum over the set +toList :: CharSet -> String +toList (CS _ _ _ l h m) + | m < 0 = map toEnum [ol..max (pred l) ol] ++ toList' l (map toEnum [min (succ h) oh..oh]) + | otherwise = toList' 0 [] + where + toList' :: Int -> String -> String + toList' !n t | n > h = t + | testBit m (n - l) = toEnum n : toList' (n+1) t + | otherwise = toList' (n+1) t +{-# INLINE toList #-} + +-- | /O(1)/ The empty set. Permits /O(1)/ null and size. +empty :: CharSet +empty = CS 0 0 0 0 0 0 +{-# INLINE empty #-} + +-- | /O(1)/ Construct a @CharSet@ with a single element. Permits /O(1)/ null and size +singleton :: Char -> CharSet +singleton x = CS 1 1 1 e e 1 where e = fromEnum x +{-# INLINE singleton #-} + +-- | /O(1|d)/ Is the 'CharSet' empty? May be faster than checking if @'size' == 0@ after union. +-- Operations that require a recount are noted. +null :: CharSet -> Bool +null (CS a b c _ _ _) + | a > 0 = False + | b == 0 = True + | otherwise = c == 0 +{-# INLINE null #-} + +-- | /O(1|d)/ The number of elements in the bit set. +size :: CharSet -> Int +size (CS a b c _ _ m) + | (a == b) && (m >= 0) = a + | a == b = oh - ol - a + | m >= 0 = c + | otherwise = oh - ol - c +{-# INLINE size #-} + +-- | /O(d)/ A 'CharSet' containing every member of the enumeration of @a@. +full :: CharSet +full = complement empty +{-# INLINE full #-} + +-- | /O(d)/ Complements a 'CharSet' with respect to the bounds of @a@. Preserves order of 'null' and 'size' +complement :: CharSet -> CharSet +complement (CS a b c l h m) = CS (Bits.complement b) (Bits.complement a) (Bits.complement c) l h (Bits.complement m) +{-# INLINE complement #-} + +-- | /O(d * n)/ Make a 'CharSet' from a list of items. +fromList :: String -> CharSet +fromList = foldr insert empty +{-# INLINE fromList #-} + +-- | /O(d * n)/ Make a 'CharSet' from a distinct ascending list of items +fromDistinctAscList :: String -> CharSet +fromDistinctAscList [] = empty +fromDistinctAscList (c:cs) = fromDistinctAscList' cs 1 0 1 + where + l = fromEnum c + fromDistinctAscList' :: String -> Int -> Int -> Integer -> CharSet + fromDistinctAscList' [] !n !h !m = CS n n n l h m + fromDistinctAscList' (c':cs') !n _ !m = fromDistinctAscList' cs' (n+1) h' (setBit m (h' - l)) + where + h' = fromEnum c' +{-# INLINE fromDistinctAscList #-} + +-- | /O(d)/ Insert a single element of type @a@ into the 'CharSet'. Preserves order of 'null' and 'size' +insert :: Char -> CharSet -> CharSet +insert x r@(CS a b c l h m) + | (m < 0) && (e < l) = r + | (m < 0) && (e > h) = r + | e < l = bs (a+1) (b+1) (c+1) e h (shiftL m (l - e) .|. 1) + | e > h = bs (a+1) (b+1) (c+1) l p (setBit m p) + | testBit m p = r + | otherwise = bs (a+1) (b+1) (c+1) l h (setBit m p) + where + e = fromEnum x + p = e - l +{-# INLINE insert #-} + +-- | /O(d)/ Delete a single item from the 'CharSet'. Preserves order of 'null' and 'size' +delete :: Char -> CharSet -> CharSet +delete x r@(CS a b c l h m) + | (m < 0) && (e < l) = bs (a+1) (b+1) (c+1) e h (shiftL m (l - e) .&. Bits.complement 1) + | (m < 0) && (e > h) = bs (a+1) (b+1) (c+1) l p (clearBit m p) + | e < l = r + | e > h = r + | testBit m p = bs (a-1) (b-1) (c-1) l h (clearBit m p) + | otherwise = r + where + e = fromEnum x + p = e - l +{-# INLINE delete #-} + +-- | /O(1)/ Test for membership in a 'CharSet' +member :: Char -> CharSet -> Bool +member x (CS _ _ _ l h m) + | e < l = m < 0 + | e > h = m > 0 + | otherwise = testBit m (e - l) + where + e = fromEnum x +{-# INLINE member #-} + +{- +notMember :: Char -> CharSet -> Bool +notMember x - not . member x +{-# INLINE notMember #-} +-} + +-- | /O(1)/ Alias for member +elem :: Char -> CharSet -> Bool +elem = member +{-# INLINE elem #-} + +-- | /O(1)/ Alias for notMember +notElem :: Char -> CharSet -> Bool +notElem x = not . elem x +{-# INLINE notElem #-} + +-- | /O(d)/ convert to an Integer representation. Discards negative elements +toInteger :: CharSet -> Integer +toInteger x = mantissa x `shift` exponent x +{-# INLINE toInteger #-} + +-- | /O(d)/. May force 'size' to take /O(d)/ if ranges overlap, preserves order of 'null' +union :: CharSet -> CharSet -> CharSet +union x@(CS _ _ _ l _ _) y@(CS _ _ _ l' _ _) + | l' < l = union' y x -- ensure left side has lower exponent + | otherwise = union' x y +{-# INLINE union #-} + +union' :: CharSet -> CharSet -> CharSet +union' x@(CS a b c l h m) y@(CS a' b' c' l' h' m') + | b == 0 = y -- fast empty union + | b' == 0 = x -- fast empty union + | a == -1 = full -- fast full union + | a' == -1 = full -- fast full union + | (m < 0) && (m' < 0) = complement (intersection' (complement x) (complement y)) -- appeal to intersection + | m' < 0 = complement (diff (complement y) x) -- union with complement + | m < 0 = complement (diff (complement x) y) -- union with complement + | h < l' = bs (a + a') (b + b') (c + c') l h' m'' -- disjoint positive ranges + | otherwise = bs (a `max` a') (b + b') (recount m'') l (h `max` h') m'' -- overlapped positives + where + m'' = m .|. shiftL m' (l' - l) + +-- | /O(1)/ check to see if we are represented as a complemented 'CharSet'. +isComplemented :: CharSet -> Bool +isComplemented = (<0) . mantissa +{-# INLINE isComplemented #-} + +-- | /O(d)/. May force 'size' and 'null' both to take /O(d)/. +intersection :: CharSet -> CharSet -> CharSet +intersection x@(CS _ _ _ l _ _) y@(CS _ _ _ l' _ _) + | l' < l = intersection' y x + | otherwise = intersection' x y +{-# INLINE intersection #-} + +-- | /O(d)/. May force 'size' and 'null' both to take /O(d)/. +intersection' :: CharSet -> CharSet -> CharSet +intersection' x@(CS a b _ l h m) y@(CS a' b' _ l' h' m') + | b == 0 = empty + | b' == 0 = empty + | a == -1 = y + | a' == -1 = x + | (m < 0) && (m' < 0) = complement (union' (complement x) (complement y)) + | m' < 0 = diff x (complement y) + | m < 0 = diff y (complement x) + | h < l' = empty + | otherwise = bs 0 (b `min` b') (recount m'') l'' (h `min` h') m'' + where + l'' = max l l' + m'' = shift m (l'' - l) .&. shift m' (l'' - l') + +-- | Unsafe internal method for computing differences +-- preconditions: +-- m >= 0, m' >= 0, a /= -1, a' /= -1, b /= 0, b' /= 0 +diff :: CharSet -> CharSet -> CharSet +diff x@(CS a _ _ l h m) (CS _ b' _ l' h' m') + | h < l' = x + | h' < l = x + | otherwise = bs (max (a - b') 0) a (recount m'') l h m'' + where + m'' = m .&. shift (Bits.complement m') (l' - l) + +-- | /O(d)/. Preserves order of 'null'. May force /O(d)/ 'size'. +difference :: CharSet -> CharSet -> CharSet +difference x@(CS a b _ _ _ m) y@(CS a' b' _ _ _ m') + | a == -1 = complement y + | a' == -1 = empty + | b == 0 = empty + | b' == 0 = x + | (m < 0) && (m' < 0) = diff (complement y) (complement x) + | m < 0 = complement (complement x `union` y) + | m' < 0 = x `union` complement y + | otherwise = diff x y + +-- | /O(d)/. Preserves order of 'null'. May force /O(d)/ 'size'. +(\\) :: CharSet -> CharSet -> CharSet +(\\) = difference + +instance Eq CharSet where + x@(CS _ _ _ l _ m) == y@(CS _ _ _ l' _ m') + | signum m == signum m' = shift m (l - l'') == shift m' (l - l'') + | m' < 0 = y == x + | otherwise = mask .&. shift m (l - ol) == shift m' (l - ol) + where + l'' = min l l' + mask = setBit 0 (oh - ol + 1) - 1 + +instance Ord CharSet where + compare = compare `on` toInteger + +instance Bounded CharSet where + minBound = empty + maxBound = CS n n n ol oh m + where + n = oh - ol + 1 + m = setBit 0 n - 1 + +-- | Return a charset based on a character range +range :: Char -> Char -> CharSet +range l h + | l <= h = CS n n n l' h' m + | otherwise = empty + where + l' = fromEnum l + h' = fromEnum h + n = h' - l' + 1 + m = setBit 0 n - 1 + +-- | /O(d)/ +recount :: Integer -> Int +recount !n + | n < 0 = Bits.complement (recount (Bits.complement n)) + | otherwise = recount' 0 0 + where + h = hwm n + recount' !i !c + | i > h = c + | otherwise = recount' (i+1) (if testBit n i then c+1 else c) + +-- | /O(d)/. Computes the equivalent of (truncate . logBase 2 . abs) extended with 0 at 0 +-- This could be computed faster by directly appealing to GMP, but that is tricky in GHC. +hwm :: Integer -> Int +hwm !n + | n < 0 = hwm (-n) + | n > 1 = scan p (2*p) + | otherwise = 0 + where + p = probe 1 + -- incrementally compute 2^(2^(i+1)) until it exceeds n + probe :: Int -> Int + probe !i + | bit (2*i) > n = i + | otherwise = probe (2*i) + + -- then binary search the powers for the highest set bit + scan :: Int -> Int -> Int + scan !l !h + | l == h = l + | bit (m+1) > n = scan l m + | otherwise = scan (m+1) h + where m = l + (h - l) `div` 2 + +toArray :: CharSet -> Array Char Bool +toArray set = array (minBound, maxBound) $ fmap (\x -> (x, x `elem` set)) [minBound .. maxBound] + +instance Show CharSet where + showsPrec d x@(CS _ _ _ _ _ m) + | m < 0 = showParen (d > 10) $ showString "complement " . showsPrec 11 (complement x) + | otherwise = showParen (d > 10) $ showString "fromDistinctAscList " . showsPrec 11 (toList x) + + +instance Read CharSet where +#ifdef __GLASGOW_HASKELL__ + readPrec = parens $ complemented +++ normal + where + complemented = prec 10 $ do + Ident "complement" <- lexP + complement `fmap` step readPrec + normal = prec 10 $ do + Ident "fromDistinctAscList" <- lexP + fromDistinctAscList `fmap` step readPrec +#else + readsPrec d r = + readParen (d > 10) (\r -> [ (complement m, t) + | ("complement", s) <- lex r + , (m, t) <- readsPrec 11 s]) r + ++ readParen (d > 10) (\r -> [ (fromDistinctAscList m, t) + | ("fromDistinctAscList", s) <- lex r + , (m, t) <- readsPrec 11 s]) r +#endif + +instance Monoid CharSet where + mempty = empty + mappend = union + +build :: (Char -> Bool) -> CharSet +build p = fromDistinctAscList $ filter p [minBound .. maxBound] + +-- :digit:, etc. +posixAscii :: Map String CharSet +posixAscii = Map.fromList + [ ("alnum", alnum') + , ("alpha", alpha') + , ("blank", fromList " \t") + , ("cntrl", insert '\x7f' $ range '\x00' '\x1f') + , ("digit", digit') + , ("graph", range '\x21' '\x7e') + , ("print", range '\x20' '\x7e') + , ("word", insert '_' alnum') + , ("punct", fromList "-!\"#$%&'()*+,./:;<=>?@[\\]^_`{|}~") + , ("space", fromList " \t\r\n\v\f") + , ("upper", upper') + , ("lower", lower') + , ("xdigit", digit `union` range 'a' 'f' `union` range 'A' 'F') + ] + where + lower' = range 'a' 'z' + upper' = range 'A' 'Z' + alpha' = lower' `union` upper' + digit' = range '0' '9' + alnum' = alpha' `union` digit' + +data UnicodeCategory = UnicodeCategory String String CharSet String + +-- \p{Letter} or \p{Mc} +unicodeCategories :: [UnicodeCategory] +unicodeCategories = + [ UnicodeCategory "Letter" "L" l "any kind of letter from any language." + , UnicodeCategory "Lowercase_Letter" "Ll" ll "a lowercase letter that has an uppercase variant" + , UnicodeCategory "Uppercase_Letter" "Lu" lu "an uppercase letter that has a lowercase variant" + , UnicodeCategory "Titlecase_Letter" "Lt" lt "a letter that appears at the start of a word when only the first letter of the word is capitalized" + , UnicodeCategory "Letter&" "L&" la "a letter that exists in lowercase and uppercase variants (combination of Ll, Lu and Lt)" + , UnicodeCategory "Modifier_Letter" "Lm" lm "a special character that is used like a letter" + , UnicodeCategory "Other_Letter" "Lo" lo "a letter or ideograph that does not have lowercase and uppercase variants" + , UnicodeCategory "Mark" "M" m "a character intended to be combined with another character (e.g. accents, umlauts, enclosing boxes, etc.)" + , UnicodeCategory "Non_Spacing_Mark" "Mn" mn "a character intended to be combined with another character without taking up extra space (e.g. accents, umlauts, etc.)" + , UnicodeCategory "Spacing_Combining_Mark" "Mc" mc "a character intended to be combined with another character that takes up extra space (vowel signs in many Eastern languages)" + , UnicodeCategory "Enclosing_Mark" "Me" me "a character that encloses the character is is combined with (circle, square, keycap, etc.)" + , UnicodeCategory "Separator" "Z" z "any kind of whitespace or invisible separator" + , UnicodeCategory "Space_Separator" "Zs" zs "a whitespace character that is invisible, but does take up space" + , UnicodeCategory "Line_Separator" "Zl" zl "line separator character U+2028" + , UnicodeCategory "Paragraph_Separator" "Zp" zp "paragraph separator character U+2029" + , UnicodeCategory "Symbol" "S" s "math symbols, currency signs, dingbats, box-drawing characters, etc." + , UnicodeCategory "Math_Symbol" "Sm" sm "any mathematical symbol" + , UnicodeCategory "Currency_Symbol" "Sc" sc "any currency sign" + , UnicodeCategory "Modifier_Symbol" "Sk" sk "a combining character (mark) as a full character on its own" + , UnicodeCategory "Other_Symbol" "So" so "various symbols that are not math symbols, currency signs, or combining characters" + , UnicodeCategory "Number" "N" n "any kind of numeric character in any script" + , UnicodeCategory "Decimal_Digit_Number" "Nd" nd "a digit zero through nine in any script except ideographic scripts" + , UnicodeCategory "Letter_Number" "Nl" nl "a number that looks like a letter, such as a Roman numeral" + , UnicodeCategory "Other_Number" "No" no "a superscript or subscript digit, or a number that is not a digit 0..9 (excluding numbers from ideographic scripts)" + , UnicodeCategory "Punctuation" "P" p "any kind of punctuation character" + , UnicodeCategory "Dash_Punctuation" "Pd" pd "any kind of hyphen or dash" + , UnicodeCategory "Open_Punctuation" "Ps" ps "any kind of opening bracket" + , UnicodeCategory "Close_Punctuation" "Pe" pe "any kind of closing bracket" + , UnicodeCategory "Initial_Punctuation" "Pi" pi "any kind of opening quote" + , UnicodeCategory "Final_Punctuation" "Pf" pf "any kind of closing quote" + , UnicodeCategory "Connector_Punctuation" "Pc" pc "a punctuation character such as an underscore that connects words" + , UnicodeCategory "Other_Punctuation" "Po" po "any kind of punctuation character that is not a dash, bracket, quote or connector" + , UnicodeCategory "Other" "C" c "invisible control characters and unused code points" + , UnicodeCategory "Control" "Cc" cc "an ASCII 0x00..0x1F or Latin-1 0x80..0x9F control character" + , UnicodeCategory "Format" "Cf" cf "invisible formatting indicator" + , UnicodeCategory "Private_Use" "Co" co "any code point reserved for private use" + , UnicodeCategory "Surrogate" "Cs" cs "one half of a surrogate pair in UTF-16 encoding" + , UnicodeCategory "Unassigned" "Cn" cn "any code point to which no character has been assigned.properties" ] + where + cat category = build ((category ==) . generalCategory) + ll = cat LowercaseLetter + lu = cat UppercaseLetter + lt = cat TitlecaseLetter + la = ll `union` lu `union` lt + lm = cat ModifierLetter + lo = cat OtherLetter + l = la `union` lm `union` lo + mn = cat NonSpacingMark + mc = cat SpacingCombiningMark + me = cat EnclosingMark + m = mn `union` mc `union` me + zs = cat Space + zl = cat LineSeparator + zp = cat ParagraphSeparator + z = zs `union` zl `union` zp + sm = cat MathSymbol + sc = cat CurrencySymbol + sk = cat ModifierSymbol + so = cat OtherSymbol + s = sm `union` sc `union` sk `union` so + nd = cat DecimalNumber + nl = cat LetterNumber + no = cat OtherNumber + n = nd `union` nl `union` no + pd = cat DashPunctuation + ps = cat OpenPunctuation + pe = cat ClosePunctuation + pi = cat InitialQuote + pf = cat FinalQuote + pc = cat ConnectorPunctuation + po = cat OtherPunctuation + p = pd `union` ps `union` pe `union` pi `union` pf `union` pc `union` po + cc = cat Control + cf = cat Format + co = cat PrivateUse + cs = cat Surrogate + cn = cat NotAssigned + c = cc `union` cf `union` co `union` cs `union` cn + +-- Haskell character classes from Data.Char +control, space, lower, upper, alpha, alphaNum, print, digit, octDigit, letter, mark, number, punctuation, symbol, separator, ascii, latin1, asciiUpper, asciiLower :: CharSet +control = build isControl +space = build isSpace +lower = build isLower +upper = build isUpper +alpha = build isAlpha +alphaNum = build isAlphaNum +print = build isPrint +digit = build isDigit +octDigit = build isOctDigit +letter = build isLetter +mark = build isMark +number = build isNumber +punctuation = build isPunctuation +symbol = build isSymbol +separator = build isSeparator +ascii = build isAscii +latin1 = build isLatin1 +asciiUpper = build isAsciiUpper +asciiLower = build isAsciiLower + +instance Typeable CharSet where + typeOf _ = mkTyConApp charSetTyCon [] + +charSetTyCon :: TyCon +charSetTyCon = mkTyCon "Data.CharSet.CharSet" +{-# NOINLINE charSetTyCon #-} + +instance Data CharSet where + gfoldl k z set | isComplemented set = z complement `k` complement set + | otherwise = z fromList `k` toList set + toConstr set + | isComplemented set = complementConstr + | otherwise = fromListConstr + + dataTypeOf _ = charSetDataType + + gunfold k z c = case constrIndex c of + 1 -> k (z fromList) + 2 -> k (z complement) + _ -> error "gunfold" + +fromListConstr :: Constr +fromListConstr = mkConstr charSetDataType "fromList" [] Prefix +{-# NOINLINE fromListConstr #-} + +complementConstr :: Constr +complementConstr = mkConstr charSetDataType "complement" [] Prefix +{-# NOINLINE complementConstr #-} + +charSetDataType :: DataType +charSetDataType = mkDataType "Data.CharSet.CharSet" [fromListConstr, complementConstr] +{-# NOINLINE charSetDataType #-} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4388fcc --- /dev/null +++ b/LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2010, Edward Kmett + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Edward Kmett nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Setup.lhs b/Setup.lhs new file mode 100755 index 0000000..c637830 --- /dev/null +++ b/Setup.lhs @@ -0,0 +1,8 @@ +#!/usr/bin/env runhaskell + +\begin{code} + +import Distribution.Simple +main = defaultMainWithHooks defaultUserHooks + +\end{code} diff --git a/charset.cabal b/charset.cabal new file mode 100644 index 0000000..3b1c646 --- /dev/null +++ b/charset.cabal @@ -0,0 +1,23 @@ +name: charset +version: 0.0 +license: BSD3 +license-File: LICENSE +copyright: (c) Edward Kmett 2010 +author: Edward Kmett +maintainer: ekmett@gmail.com +stability: Experimental +category: Data +homepage: http://github.com/ekmett/charset +synopsis: Fast unicode character sets +description: Fast unicode character sets + +build-type: Simple +build-depends: + base >= 4 && < 5, + containers >= 0.2 && < 0.4, + array >= 0.2 && < 0.4 + +exposed-modules: + Data.CharSet + +GHC-Options: -Wall -fspec-constr -fdicts-cheap -O2