Skip to content

Commit

Permalink
Compatibility with haskell/text#365 (needs a close review)
Browse files Browse the repository at this point in the history
  • Loading branch information
jberryman committed Aug 23, 2021
1 parent 610892e commit 806bb14
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 28 deletions.
1 change: 1 addition & 0 deletions Data/Attoparsec/Internal/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import Prelude hiding (succ)
import qualified Data.Attoparsec.ByteString.Buffer as B
import qualified Data.Attoparsec.Text.Buffer as T

-- | Position(?) in code units (afaict)
newtype Pos = Pos { fromPos :: Int }
deriving (Eq, Ord, Show, Num)

Expand Down
23 changes: 8 additions & 15 deletions Data/Attoparsec/Text/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module Data.Attoparsec.Text.Buffer
, iter
, iter_
, substring
, dropWord16
, dropWord8
) where

import Control.Exception (assert)
Expand All @@ -46,7 +46,7 @@ import Data.Text ()
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
import Data.Text.Unsafe (Iter(..))
import Data.Text.Unsafe (Iter(..), iterArray)
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
import GHC.ST (ST(..), runST)
Expand Down Expand Up @@ -132,31 +132,24 @@ substring s l (Buf arr off len _ _) =
Text arr (off+s) l
{-# INLINE substring #-}

dropWord16 :: Int -> Buffer -> Text
dropWord16 s (Buf arr off len _ _) =
dropWord8 :: Int -> Buffer -> Text
dropWord8 s (Buf arr off len _ _) =
assert (s >= 0 && s <= len) $
Text arr (off+s) (len-s)
{-# INLINE dropWord16 #-}
{-# INLINE dropWord8 #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter (Buf arr off _ _ _) i
| m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
| otherwise = Iter (chr2 m n) 2
where m = A.unsafeIndex arr j
n = A.unsafeIndex arr k
j = off + i
k = j + 1
iter (Buf arr off _ _ _) i = iterArray arr (off+i)
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
| otherwise = 2
where m = A.unsafeIndex arr (off+i)
iter_ b i = case iter b i of
Iter _ l -> l
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
Expand Down
10 changes: 5 additions & 5 deletions Data/Attoparsec/Text/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ string_ suspended f s0 = T.Parser $ \t pos more lose succ ->
| T.null ft -> suspended s s t pos more lose succ
| otherwise -> lose t pos more [] "string"
Just (pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 pfx)
| T.null ssfx -> let l = Pos (T.lengthWord8 pfx)
in succ t (pos + l) more (substring pos l t)
| not (T.null tsfx) -> lose t pos more [] "string"
| otherwise -> suspended s ssfx t pos more lose succ
Expand All @@ -195,7 +195,7 @@ stringSuspended f s000 s0 t0 pos0 more0 lose0 succ0 =
in case T.commonPrefixes s0 s of
Nothing -> lose t pos more [] "string"
Just (_pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 s000)
| T.null ssfx -> let l = Pos (T.lengthWord8 s000)
in succ t (pos + l) more (substring pos l t)
| T.null tsfx -> stringSuspended f s000 ssfx t pos more lose succ
| otherwise -> lose t pos more [] "string"
Expand Down Expand Up @@ -445,12 +445,12 @@ endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())

-- | Terminal failure continuation.
failK :: Failure a
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord16 pos t) stack msg
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord8 pos t) stack msg
{-# INLINE failK #-}

-- | Terminal success continuation.
successK :: Success a a
successK t (Pos pos) _more a = Done (Buf.dropWord16 pos t) a
successK t (Pos pos) _more a = Done (Buf.dropWord8 pos t) a
{-# INLINE successK #-}

-- | Run a parser.
Expand All @@ -477,7 +477,7 @@ parseOnly m s = case runParser m (buffer s) 0 Complete failK successK of

get :: Parser Text
get = T.Parser $ \t pos more _lose succ ->
succ t pos more (Buf.dropWord16 (fromPos pos) t)
succ t pos more (Buf.dropWord8 (fromPos pos) t)
{-# INLINE get #-}

endOfChunk :: Parser Bool
Expand Down
16 changes: 8 additions & 8 deletions tests/QC/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ b_length :: BPB -> Property
b_length (BP _ts t buf) = B.length t === BB.length buf

t_length :: BPT -> Property
t_length (BP _ts t buf) = T.lengthWord16 t === BT.length buf
t_length (BP _ts t buf) = T.lengthWord8 t === BT.length buf

b_unsafeIndex :: BPB -> Gen Property
b_unsafeIndex (BP _ts t buf) = do
Expand All @@ -61,14 +61,14 @@ b_unsafeIndex (BP _ts t buf) = do

t_iter :: BPT -> Gen Property
t_iter (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = T.lengthWord8 t
i <- choose (0,l-1)
let it (T.Iter c q) = (c,q)
return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i)

t_iter_ :: BPT -> Gen Property
t_iter_ (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = T.lengthWord8 t
i <- choose (0,l-1)
return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i

Expand All @@ -77,10 +77,10 @@ b_unsafeDrop (BP _ts t buf) = do
i <- choose (0, B.length t)
return $ B.unsafeDrop i t === BB.unsafeDrop i buf

t_dropWord16 :: BPT -> Gen Property
t_dropWord16 (BP _ts t buf) = do
i <- choose (0, T.lengthWord16 t)
return $ T.dropWord16 i t === BT.dropWord16 i buf
t_dropWord8 :: BPT -> Gen Property
t_dropWord8 (BP _ts t buf) = do
i <- choose (0, T.lengthWord8 t)
return $ T.dropWord8 i t === BT.dropWord8 i buf

tests :: [TestTree]
tests = [
Expand All @@ -92,5 +92,5 @@ tests = [
, testProperty "t_iter" t_iter
, testProperty "t_iter_" t_iter_
, testProperty "b_unsafeDrop" b_unsafeDrop
, testProperty "t_dropWord16" t_dropWord16
, testProperty "t_dropWord8" t_dropWord8
]

0 comments on commit 806bb14

Please sign in to comment.