-
Notifications
You must be signed in to change notification settings - Fork 158
/
Fusion.hs
231 lines (207 loc) · 8.03 KB
/
Fusion.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
{-# LANGUAGE BangPatterns, MagicHash #-}
-- |
-- Module : Data.Text.Fusion
-- Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009-2010,
-- (c) Duncan Coutts 2009
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
-- duncan@haskell.org
-- Stability : experimental
-- Portability : GHC
--
-- Text manipulation functions represented as fusible operations over
-- streams.
module Data.Text.Fusion
(
-- * Types
Stream(..)
, Step(..)
-- * Creation and elimination
, stream
, unstream
, reverseStream
, length
-- * Transformations
, reverse
-- * Construction
-- ** Scans
, reverseScanr
-- ** Accumulating maps
, mapAccumL
-- ** Generation and unfolding
, unfoldrN
-- * Indexing
, index
, findIndex
, countChar
) where
import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
Num(..), Ord(..), ($), (&&),
fromIntegral, otherwise)
import Data.Bits ((.&.))
import Data.Text.Internal (Text(..))
import Data.Text.Private (runText)
import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
import Data.Text.UnsafeShift (shiftL, shiftR)
import qualified Data.Text.Array as A
import qualified Data.Text.Fusion.Common as S
import Data.Text.Fusion.Internal
import Data.Text.Fusion.Size
import qualified Data.Text.Internal as I
import qualified Data.Text.Encoding.Utf16 as U16
default(Int)
-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream (Text arr off len) = Stream next off (maxSize len)
where
!end = off+len
next !i
| i >= end = Done
| n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
| otherwise = Yield (unsafeChr n) (i + 1)
where
n = A.unsafeIndex arr i
n2 = A.unsafeIndex arr (i + 1)
{-# INLINE [0] stream #-}
-- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
-- backwards.
reverseStream :: Text -> Stream Char
reverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len)
where
{-# INLINE next #-}
next !i
| i < off = Done
| n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2)
| otherwise = Yield (unsafeChr n) (i - 1)
where
n = A.unsafeIndex arr i
n2 = A.unsafeIndex arr (i - 1)
{-# INLINE [0] reverseStream #-}
-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> Text
unstream (Stream next0 s0 len) = runText $ \done -> do
let mlen = upperBound 4 len
arr0 <- A.new mlen
let outer arr top = loop
where
loop !s !i =
case next0 s of
Done -> done arr i
Skip s' -> loop s' i
Yield x s'
| j >= top -> {-# SCC "unstream/resize" #-} do
let top' = (top + 1) `shiftL` 1
arr' <- A.new top'
A.copyM arr' 0 arr 0 top
outer arr' top' s i
| otherwise -> do d <- unsafeWrite arr i x
loop s' (i+d)
where j | ord x < 0x10000 = i
| otherwise = i + 1
outer arr0 mlen s0 0
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
-- ----------------------------------------------------------------------------
-- * Basic stream functions
length :: Stream Char -> Int
length = S.lengthI
{-# INLINE[0] length #-}
-- | /O(n)/ Reverse the characters of a string.
reverse :: Stream Char -> Text
reverse (Stream next s len0)
| isEmpty len0 = I.empty
| otherwise = I.textP arr off' len'
where
len0' = upperBound 4 (larger len0 4)
(arr, (off', len')) = A.run2 (A.new len0' >>= loop s (len0'-1) len0')
loop !s0 !i !len marr =
case next s0 of
Done -> return (marr, (j, len-j))
where j = i + 1
Skip s1 -> loop s1 i len marr
Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do
let newLen = len `shiftL` 1
marr' <- A.new newLen
A.copyM marr' (newLen-len) marr 0 len
write s1 (len+i) newLen marr'
| otherwise -> write s1 i len marr
where n = ord x
least | n < 0x10000 = 0
| otherwise = 1
m = n - 0x10000
lo = fromIntegral $ (m `shiftR` 10) + 0xD800
hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
write t j l mar
| n < 0x10000 = do
A.unsafeWrite mar j (fromIntegral n)
loop t (j-1) l mar
| otherwise = do
A.unsafeWrite mar (j-1) lo
A.unsafeWrite mar j hi
loop t (j-2) l mar
{-# INLINE [0] reverse #-}
-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
-- the input and result reversed.
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr f z0 (Stream next0 s0 len) = Stream next (S1 :*: z0 :*: s0) (len+1) -- HINT maybe too low
where
{-# INLINE next #-}
next (S1 :*: z :*: s) = Yield z (S2 :*: z :*: s)
next (S2 :*: z :*: s) = case next0 s of
Yield x s' -> let !x' = f x z
in Yield x' (S2 :*: x' :*: s')
Skip s' -> Skip (S2 :*: z :*: s')
Done -> Done
{-# INLINE reverseScanr #-}
-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
-- value. However, the length of the result is limited by the
-- first argument to 'unfoldrN'. This function is more efficient than
-- 'unfoldr' when the length of the result is known.
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
unfoldrN n = S.unfoldrNI n
{-# INLINE [0] unfoldrN #-}
-------------------------------------------------------------------------------
-- ** Indexing streams
-- | /O(n)/ stream index (subscript) operator, starting from 0.
index :: Stream Char -> Int -> Char
index = S.indexI
{-# INLINE [0] index #-}
-- | The 'findIndex' function takes a predicate and a stream and
-- returns the index of the first element in the stream
-- satisfying the predicate.
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex = S.findIndexI
{-# INLINE [0] findIndex #-}
-- | /O(n)/ The 'count' function returns the number of times the query
-- element appears in the given stream.
countChar :: Char -> Stream Char -> Int
countChar = S.countCharI
{-# INLINE [0] countChar #-}
-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
-- function to each element of a 'Text', passing an accumulating
-- parameter from left to right, and returns a final 'Text'.
mapAccumL :: (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
mapAccumL f z0 (Stream next0 s0 len) = (nz,I.textP na 0 nl)
where
(na,(nz,nl)) = A.run2 (A.new mlen >>= \arr -> outer arr mlen z0 s0 0)
where mlen = upperBound 4 len
outer arr top = loop
where
loop !z !s !i =
case next0 s of
Done -> return (arr, (z,i))
Skip s' -> loop z s' i
Yield x s'
| j >= top -> {-# SCC "mapAccumL/resize" #-} do
let top' = (top + 1) `shiftL` 1
arr' <- A.new top'
A.copyM arr' 0 arr 0 top
outer arr' top' z s i
| otherwise -> do let (z',c) = f z x
d <- unsafeWrite arr i c
loop z' s' (i+d)
where j | ord x < 0x10000 = i
| otherwise = i + 1
{-# INLINE [0] mapAccumL #-}