Skip to content

Commit

Permalink
Merge pull request #22 from Bodigrim/master
Browse files Browse the repository at this point in the history
Use raw bitmaps
  • Loading branch information
harendra-kumar committed Apr 28, 2020
2 parents 3b9cb95 + 1eb7b8f commit 95b2e91
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 79 deletions.
27 changes: 0 additions & 27 deletions Data/Unicode/Properties/BitArray.hs

This file was deleted.

19 changes: 10 additions & 9 deletions Data/Unicode/Properties/CombiningClass.hs

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions Data/Unicode/Properties/Decomposable.hs

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions Data/Unicode/Properties/DecomposableK.hs

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions Data/Unicode/Properties/Decompose.hs
Expand Up @@ -17,7 +17,6 @@ module Data.Unicode.Properties.Decompose
)
where

import Data.BitArray (BitArray, lookupBit)
import Data.Char (ord)

import qualified Data.Unicode.Properties.Decomposable as D
Expand Down Expand Up @@ -46,7 +45,7 @@ decomposeMax DecomposeNFD = D.decomposeMax
decomposeMax DecomposeNFKD = K.decomposeMax

{-# INLINE decomposeBitmap #-}
decomposeBitmap :: DecomposeMode -> BitArray
decomposeBitmap :: DecomposeMode -> Int -> Bool
decomposeBitmap DecomposeNFD = D.decomposeBitmap
decomposeBitmap DecomposeNFKD = K.decomposeBitmap

Expand All @@ -62,7 +61,7 @@ data DecomposeResult = FalseA | FalseB | FalseC | TrueA
isDecomposable :: DecomposeMode -> Char -> DecomposeResult
isDecomposable mode c | (ord c) < decomposeMin mode = FalseA
isDecomposable mode c | (ord c) <= decomposeMax mode =
case lookupBit (decomposeBitmap mode) (ord c) of
case decomposeBitmap mode (ord c) of
True -> TrueA
False -> FalseB
isDecomposable _ _ = FalseC
62 changes: 40 additions & 22 deletions unicode-data/UCD2Haskell.hs
Expand Up @@ -23,9 +23,11 @@ import Prelude hiding (pred)
import Control.DeepSeq (NFData (..), deepseq)
import Control.Exception
import Data.Binary as Bin
import Data.Bits (shiftL)
import qualified Data.ByteString.Lazy as L
import Data.Char (chr)
import Data.Char (ord)
import Data.List (unfoldr)
import Data.Map ((!))
import qualified Data.Map as M
import Data.Monoid ((<>))
Expand Down Expand Up @@ -104,34 +106,49 @@ genMinMax prefix ordList = unlines
]

genBitmap :: String -> [Int] -> String
genBitmap prefix ordList =
-- On ARM, compilation fails with llvm optimizer crashing when one big list
-- is used. Split it into two to avoid the problem.
let l = length ordList
mn = minimum ordList
mx = maximum ordList
(ordList1, ordList2) = splitAt (div l 2) ordList
in unlines
[ "bitList1, bitList2 :: [Int]"
, "bitList1 = " ++ show ordList1
, "bitList2 = " ++ show ordList2
, ""
, prefix <> "Bitmap :: BitArray"
, prefix <> "Bitmap = bitArraySetBits "
++ (show (mn, mx))
++ " $ bitList1 ++ bitList2"
]
genBitmap prefix ordList = unlines
[ prefix <> "Bitmap :: Int -> Bool"
, prefix <> "Bitmap i@(I# i#) = W# (indexWordOffAddr# addr# (i# `iShiftRL#` logFbs#)) `testBit` (i .&. (fbs - 1))"
, " where"
, " fbs = finiteBitSize (0 :: Word)"
, " logFbs# = case countTrailingZeros fbs of I# l# -> l#"
, " addr# = " ++ show (bitMapToAddrLiteral (positionsToBitMap ordList)) ++ "#"
]

positionsToBitMap :: [Int] -> [Bool]
positionsToBitMap = go 0
where
go _ [] = []
go i xxs@(x : xs)
| i < x = False : go (i + 1) xxs
| otherwise = True : go (i + 1) xs

bitMapToAddrLiteral :: [Bool] -> String
bitMapToAddrLiteral = map (chr . toByte . padTo8) . unfoldr go
where
go :: [a] -> Maybe ([a], [a])
go [] = Nothing
go xs = Just (take 8 xs, drop 8 xs)

padTo8 :: [Bool] -> [Bool]
padTo8 xs
| length xs >= 8 = xs
| otherwise = xs ++ replicate (8 - length xs) False

toByte :: [Bool] -> Int
toByte xs = sum $ map (\i -> if xs !! i then 1 `shiftL` i else 0) [0..7]

genCombiningClass :: PropertiesDB -> String -> String
genCombiningClass props file = unlines
[ "-- autogenerated from Unicode data"
, "{-# LANGUAGE MagicHash #-}"
, "module Data.Unicode.Properties." <> file
, "(getCombiningClass, isCombining)"
, "where"
, ""
, "import Data.Bits ((.&.), testBit, finiteBitSize, countTrailingZeros)"
, "import Data.Char (ord)"
, "import Data.BitArray (BitArray, lookupBit)"
, "import Data.Unicode.Properties.BitArray (bitArraySetBits)"
, "import GHC.Exts (Int(..), Word(..), iShiftRL#, indexWordOffAddr#)"
, ""
, "getCombiningClass :: Char -> Int"
, concat $ map genCombiningClassDef ccmap
Expand All @@ -140,7 +157,7 @@ genCombiningClass props file = unlines
, "{-# INLINE isCombining #-}"
, genSignature "isCombining"
, genRangeCheck "isCombining" ordList
, "isCombining c = lookupBit combiningBitmap (ord c)"
, "isCombining c = combiningBitmap (ord c)"
, genBitmap "combining" ordList
]
where
Expand All @@ -166,12 +183,13 @@ decompositions dtype =
genDecomposable :: DType -> PropertiesDB -> String -> String
genDecomposable dtype props file = unlines
[ "-- autogenerated from Unicode data"
, "{-# LANGUAGE MagicHash #-}"
, "module Data.Unicode.Properties." <> file
, "(decomposeBitmap, decomposeMax, decomposeMin)"
, "where"
, ""
, "import Data.BitArray (BitArray)"
, "import Data.Unicode.Properties.BitArray (bitArraySetBits)"
, "import Data.Bits ((.&.), testBit, finiteBitSize, countTrailingZeros)"
, "import GHC.Exts (Int(..), Word(..), iShiftRL#, indexWordOffAddr#)"
, ""
, genMinMax "decompose" ordList
, genBitmap "decompose" ordList
Expand Down
2 changes: 0 additions & 2 deletions unicode-transforms.cabal
Expand Up @@ -67,7 +67,6 @@ library
Data.Unicode.Types
other-modules:
Data.Unicode.Internal.NormalizeStream
Data.Unicode.Properties.BitArray
Data.Unicode.Properties.CombiningClass
Data.Unicode.Properties.Compositions
Data.Unicode.Properties.Decomposable
Expand All @@ -82,7 +81,6 @@ library
ghc-options: -Wall -fwarn-identities -fwarn-incomplete-record-updates -fwarn-incomplete-uni-patterns -fwarn-tabs
build-depends:
base >=4.7 && <5
, bitarray >=0.0.1 && <0.1
, bytestring >=0.9 && <0.11
, text >=1.1.1 && <1.3
if flag(dev)
Expand Down

0 comments on commit 95b2e91

Please sign in to comment.