Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

853 lines (672 sloc) 30.986 kb
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE MagicHash #-}
module Data.HashTable.Internal.CacheLine
( cacheLineSearch
, cacheLineSearch2
, cacheLineSearch3
, forwardSearch2
, forwardSearch3
, isCacheLineAligned
, advanceByCacheLineSize
, prefetchRead
, prefetchWrite
, bl_abs#
, sign#
, mask#
, maskw#
) where
import Control.Monad.ST
import Data.HashTable.Internal.IntArray (IntArray)
import qualified Data.HashTable.Internal.IntArray as M
#ifndef NO_C_SEARCH
import Control.Monad
import Foreign.C.Types
#else
import Data.Bits
import Data.Int
import qualified Data.Vector.Unboxed as U
import GHC.Int
#endif
import Data.HashTable.Internal.Utils
import GHC.Exts
{-# INLINE prefetchRead #-}
{-# INLINE prefetchWrite #-}
prefetchRead :: IntArray s -> Int -> ST s ()
prefetchWrite :: IntArray s -> Int -> ST s ()
#ifndef NO_C_SEARCH
foreign import ccall unsafe "lineSearch32"
c_lineSearch32 :: Ptr a -> CInt -> CUInt -> IO CInt
foreign import ccall unsafe "lineSearch64"
c_lineSearch64 :: Ptr a -> CInt -> CULong -> IO CInt
foreign import ccall unsafe "lineSearch32_2"
c_lineSearch32_2 :: Ptr a -> CInt -> CUInt -> CUInt -> IO CInt
foreign import ccall unsafe "lineSearch64_2"
c_lineSearch64_2 :: Ptr a -> CInt -> CULong -> CULong -> IO CInt
foreign import ccall unsafe "lineSearch32_3"
c_lineSearch32_3 :: Ptr a -> CInt -> CUInt -> CUInt -> CUInt -> IO CInt
foreign import ccall unsafe "lineSearch64_3"
c_lineSearch64_3 :: Ptr a -> CInt -> CULong -> CULong -> CULong -> IO CInt
foreign import ccall unsafe "forwardSearch32_2"
c_forwardSearch32_2 :: Ptr a -> CInt -> CInt -> CUInt -> CUInt -> IO CInt
foreign import ccall unsafe "forwardSearch32_3"
c_forwardSearch32_3 :: Ptr a -> CInt -> CInt -> CUInt -> CUInt -> CUInt
-> IO CInt
foreign import ccall unsafe "forwardSearch64_2"
c_forwardSearch64_2 :: Ptr a -> CInt -> CInt -> CULong -> CULong -> IO CInt
foreign import ccall unsafe "forwardSearch64_3"
c_forwardSearch64_3 :: Ptr a -> CInt -> CInt -> CULong -> CULong -> CULong
-> IO CInt
foreign import ccall unsafe "prefetchCacheLine32_read"
prefetchCacheLine32_read :: Ptr a -> CInt -> IO ()
foreign import ccall unsafe "prefetchCacheLine64_read"
prefetchCacheLine64_read :: Ptr a -> CInt -> IO ()
foreign import ccall unsafe "prefetchCacheLine32_write"
prefetchCacheLine32_write :: Ptr a -> CInt -> IO ()
foreign import ccall unsafe "prefetchCacheLine64_write"
prefetchCacheLine64_write :: Ptr a -> CInt -> IO ()
fI :: (Num b, Integral a) => a -> b
fI = fromIntegral
prefetchRead a i = unsafeIOToST c
where
v = M.toPtr a
x = fI i
c32 = prefetchCacheLine32_read v x
c64 = prefetchCacheLine64_read v x
c = if wordSize == 32 then c32 else c64
prefetchWrite a i = unsafeIOToST c
where
v = M.toPtr a
x = fI i
c32 = prefetchCacheLine32_write v x
c64 = prefetchCacheLine64_write v x
c = if wordSize == 32 then c32 else c64
{-# INLINE forwardSearch2 #-}
forwardSearch2 :: IntArray s -> Int -> Int -> Int -> Int -> ST s Int
forwardSearch2 !vec !start !end !x1 !x2 =
liftM fromEnum $! unsafeIOToST c
where
c32 = c_forwardSearch32_2 (M.toPtr vec) (fI start) (fI end) (fI x1) (fI x2)
c64 = c_forwardSearch64_2 (M.toPtr vec) (fI start) (fI end) (fI x1) (fI x2)
c = if wordSize == 32 then c32 else c64
{-# INLINE forwardSearch3 #-}
forwardSearch3 :: IntArray s -> Int -> Int -> Int -> Int -> Int -> ST s Int
forwardSearch3 !vec !start !end !x1 !x2 !x3 =
liftM fromEnum $! unsafeIOToST c
where
c32 = c_forwardSearch32_3 (M.toPtr vec) (fI start) (fI end)
(fI x1) (fI x2) (fI x3)
c64 = c_forwardSearch64_3 (M.toPtr vec) (fI start) (fI end)
(fI x1) (fI x2) (fI x3)
c = if wordSize == 32 then c32 else c64
{-# INLINE lineSearch #-}
lineSearch :: IntArray s -> Int -> Int -> ST s Int
lineSearch !vec !start !value =
liftM fromEnum $! unsafeIOToST c
where
c32 = c_lineSearch32 (M.toPtr vec) (fI start) (fI value)
c64 = c_lineSearch64 (M.toPtr vec) (fI start) (fI value)
c = if wordSize == 32 then c32 else c64
{-# INLINE lineSearch2 #-}
lineSearch2 :: IntArray s -> Int -> Int -> Int -> ST s Int
lineSearch2 !vec !start !x1 !x2 =
liftM fromEnum $! unsafeIOToST c
where
c32 = c_lineSearch32_2 (M.toPtr vec) (fI start) (fI x1) (fI x2)
c64 = c_lineSearch64_2 (M.toPtr vec) (fI start) (fI x1) (fI x2)
c = if wordSize == 32 then c32 else c64
{-# INLINE lineSearch3 #-}
lineSearch3 :: IntArray s -> Int -> Int -> Int -> Int -> ST s Int
lineSearch3 !vec !start !x1 !x2 !x3 =
liftM fromEnum $! unsafeIOToST c
where
c32 = c_lineSearch32_3 (M.toPtr vec) (fI start) (fI x1) (fI x2) (fI x3)
c64 = c_lineSearch64_3 (M.toPtr vec) (fI start) (fI x1) (fI x2) (fI x3)
c = if wordSize == 32 then c32 else c64
#endif
{-# INLINE advanceByCacheLineSize #-}
advanceByCacheLineSize :: Int -> Int -> Int
advanceByCacheLineSize !(I# start0#) !(I# vecSize#) = out
where
!(I# clm#) = cacheLineIntMask
!clmm# = not# (int2Word# clm#)
!start# = word2Int# (clmm# `and#` int2Word# start0#)
!(I# nw#) = numWordsInCacheLine
!start'# = start# +# nw#
!s# = sign# (vecSize# -# start'# -# 1#)
!m# = not# (int2Word# s#)
!r# = int2Word# start'# `and#` m#
!out = I# (word2Int# r#)
{-# INLINE isCacheLineAligned #-}
isCacheLineAligned :: Int -> Bool
isCacheLineAligned (I# x#) = r# ==# 0#
where
!(I# m#) = cacheLineIntMask
!mw# = int2Word# m#
!w# = int2Word# x#
!r# = word2Int# (mw# `and#` w#)
{-# INLINE sign# #-}
-- | Returns 0 if x is positive, -1 otherwise
sign# :: Int# -> Int#
sign# !x# = x# `uncheckedIShiftRA#` wordSizeMinus1#
where
!(I# wordSizeMinus1#) = wordSize-1
{-# INLINE bl_abs# #-}
-- | Abs of an integer, branchless
bl_abs# :: Int# -> Int#
bl_abs# !x# = word2Int# r#
where
!m# = sign# x#
!r# = (int2Word# (m# +# x#)) `xor#` int2Word# m#
{-# INLINE mask# #-}
-- | Returns 0xfff..fff (aka -1) if a# == b#, 0 otherwise.
mask# :: Int# -> Int# -> Int#
mask# !a# !b# = dest#
where
!d# = a# -# b#
!r# = bl_abs# d# -# 1#
!dest# = sign# r#
{- note: this code should be:
mask# :: Int# -> Int# -> Int#
mask# !a# !b# = let !(I# z#) = fromEnum (a# ==# b#)
!q# = negateInt# z#
in q#
but GHC doesn't properly optimize this as straight-line code at the moment.
-}
{-# INLINE maskw# #-}
maskw# :: Int# -> Int# -> Word#
maskw# !a# !b# = int2Word# (mask# a# b#)
#ifdef NO_C_SEARCH
prefetchRead _ _ = return ()
prefetchWrite _ _ = return ()
{-# INLINE forwardSearch2 #-}
forwardSearch2 :: IntArray s -> Int -> Int -> Int -> Int -> ST s Int
forwardSearch2 !vec !start !end !x1 !x2 = go start end False
where
next !i !e !b = let !j = i+1
in if j == e
then (if b then (-1,e,True) else (0,start,True))
else (j,e,b)
go !i !e !b = do
h <- M.readArray vec i
if h == x1 || h == x2
then return i
else do
let (!i',!e',!b') = next i e b
if (i' < 0) then return (-1) else go i' e' b'
{-# INLINE forwardSearch3 #-}
forwardSearch3 :: IntArray s -> Int -> Int -> Int -> Int -> Int -> ST s Int
forwardSearch3 !vec !start !end !x1 !x2 !x3 = go start end False
where
next !i !e !b = let !j = i+1
in if j == e
then (if b then (-1,e,True) else (0,start,True))
else (j,e,b)
go !i !e !b = do
h <- M.readArray vec i
if h == x1 || h == x2 || h == x3
then return i
else do
let (!i',!e',!b') = next i e b
if (i' < 0) then return (-1) else go i' e' b'
deBruijnBitPositions :: U.Vector Int8
deBruijnBitPositions =
U.fromList [
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
]
{-# INLINE firstBitSet# #-}
-- only works with 32-bit values -- ok for us here
firstBitSet# :: Int# -> Int#
firstBitSet# i# = word2Int# ((or# zeroCase# posw#))
where
!zeroCase# = int2Word# (mask# 0# i#)
!w# = int2Word# i#
!iLowest# = word2Int# (and# w# (int2Word# (negateInt# i#)))
!idxW# = uncheckedShiftRL#
(narrow32Word# (timesWord# (int2Word# iLowest#)
(int2Word# 0x077CB531#)))
27#
!idx = I# (word2Int# idxW#)
!(I8# pos8#) = U.unsafeIndex deBruijnBitPositions idx
!posw# = int2Word# pos8#
#endif
------------------------------------------------------------------------------
-- | Search through a mutable vector for a given int value, cache-line aligned.
-- If the start index is cache-line aligned, and there is more than a
-- cache-line's room between the start index and the end of the vector, we will
-- search the cache-line all at once using an efficient branchless
-- bit-twiddling technique. Otherwise, we will use a typical loop.
--
cacheLineSearch :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
cacheLineSearch !vec !start !value = do
#ifdef NO_C_SEARCH
let !vlen = M.length vec
let !st1 = vlen - start
let !nvlen = numWordsInCacheLine - st1
let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask
let st2 = adv - start
if nvlen > 0 || not (isCacheLineAligned start)
then naiveSearch vec start (min st1 st2) value
else lineSearch vec start value
#else
lineSearch vec start value
#endif
{-# INLINE cacheLineSearch #-}
#ifdef NO_C_SEARCH
-- | Search through a mutable vector for a given int value. The number of
-- things to search for must be at most the number of things remaining in the
-- vector.
naiveSearch :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ number of things to search
-> Int -- ^ value to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
naiveSearch !vec !start !nThings !value = go start
where
!doneIdx = start + nThings
go !i | i >= doneIdx = return (-1)
| otherwise = do
x <- M.readArray vec i
if x == value then return i else go (i+1)
{-# INLINE naiveSearch #-}
lineResult# :: Word# -- ^ mask
-> Int -- ^ start value
-> Int
lineResult# bitmask# (I# start#) = I# (word2Int# rv#)
where
!p# = firstBitSet# (word2Int# bitmask#)
!mm# = maskw# p# (-1#)
!nmm# = not# mm#
!rv# = mm# `or#` (nmm# `and#` (int2Word# (start# +# p#)))
{-# INLINE lineResult# #-}
lineSearch :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch | wordSize == 32 = lineSearch32
| otherwise = lineSearch64
{-# INLINE lineSearch #-}
lineSearch64 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch64 !vec !start !(I# v#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = maskw# x1# v# `and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#` (maskw# x2# v# `and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#` (maskw# x3# v# `and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#` (maskw# x4# v# `and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#` (maskw# x5# v# `and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#` (maskw# x6# v# `and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#` (maskw# x7# v# `and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#` (maskw# x8# v# `and#` int2Word# 0x80#)
return $! lineResult# p8# start
{-# INLINE lineSearch64 #-}
lineSearch32 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch32 !vec !start !(I# v#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = maskw# x1# v# `and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#` (maskw# x2# v# `and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#` (maskw# x3# v# `and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#` (maskw# x4# v# `and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#` (maskw# x5# v# `and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#` (maskw# x6# v# `and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#` (maskw# x7# v# `and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#` (maskw# x8# v# `and#` int2Word# 0x80#)
(I# x9#) <- M.readArray vec $! start + 8
let !p9# = p8# `or#` (maskw# x9# v# `and#` int2Word# 0x100#)
(I# x10#) <- M.readArray vec $! start + 9
let !p10# = p9# `or#` (maskw# x10# v# `and#` int2Word# 0x200#)
(I# x11#) <- M.readArray vec $! start + 10
let !p11# = p10# `or#` (maskw# x11# v# `and#` int2Word# 0x400#)
(I# x12#) <- M.readArray vec $! start + 11
let !p12# = p11# `or#` (maskw# x12# v# `and#` int2Word# 0x800#)
(I# x13#) <- M.readArray vec $! start + 12
let !p13# = p12# `or#` (maskw# x13# v# `and#` int2Word# 0x1000#)
(I# x14#) <- M.readArray vec $! start + 13
let !p14# = p13# `or#` (maskw# x14# v# `and#` int2Word# 0x2000#)
(I# x15#) <- M.readArray vec $! start + 14
let !p15# = p14# `or#` (maskw# x15# v# `and#` int2Word# 0x4000#)
(I# x16#) <- M.readArray vec $! start + 15
let !p16# = p15# `or#` (maskw# x16# v# `and#` int2Word# 0x8000#)
return $! lineResult# p16# start
{-# INLINE lineSearch32 #-}
#endif
------------------------------------------------------------------------------
-- | Search through a mutable vector for one of two given int values,
-- cache-line aligned. If the start index is cache-line aligned, and there is
-- more than a cache-line's room between the start index and the end of the
-- vector, we will search the cache-line all at once using an efficient
-- branchless bit-twiddling technique. Otherwise, we will use a typical loop.
--
cacheLineSearch2 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
cacheLineSearch2 !vec !start !value !value2 = do
#ifdef NO_C_SEARCH
let !vlen = M.length vec
let !st1 = vlen - start
let !nvlen = numWordsInCacheLine - st1
let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask
let st2 = adv - start
if nvlen > 0 || not (isCacheLineAligned start)
then naiveSearch2 vec start (min st1 st2) value value2
else lineSearch2 vec start value value2
#else
lineSearch2 vec start value value2
#endif
{-# INLINE cacheLineSearch2 #-}
#ifdef NO_C_SEARCH
naiveSearch2 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ number of things to search
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
naiveSearch2 !vec !start !nThings !value1 !value2 = go start
where
!doneIdx = start + nThings
go !i | i >= doneIdx = return (-1)
| otherwise = do
x <- M.readArray vec i
if x == value1 || x == value2 then return i else go (i+1)
{-# INLINE naiveSearch2 #-}
lineSearch2 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch2 | wordSize == 32 = lineSearch32_2
| otherwise = lineSearch64_2
lineSearch64_2 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch64_2 !vec !start !(I# v#) !(I# v2#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = (maskw# x1# v# `or#` maskw# x1# v2#) `and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#` ((maskw# x2# v# `or#` maskw# x2# v2#)
`and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#` ((maskw# x3# v# `or#` maskw# x3# v2#)
`and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#` ((maskw# x4# v# `or#` maskw# x4# v2#)
`and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#` ((maskw# x5# v# `or#` maskw# x5# v2#)
`and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#` ((maskw# x6# v# `or#` maskw# x6# v2#)
`and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#` ((maskw# x7# v# `or#` maskw# x7# v2#)
`and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#` ((maskw# x8# v# `or#` maskw# x8# v2#)
`and#` int2Word# 0x80#)
return $! lineResult# p8# start
{-# INLINE lineSearch64_2 #-}
lineSearch32_2 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch32_2 !vec !start !(I# v#) !(I# v2#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = (maskw# x1# v# `or#` maskw# x1# v2#) `and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#` ((maskw# x2# v# `or#` maskw# x2# v2#)
`and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#` ((maskw# x3# v# `or#` maskw# x3# v2#)
`and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#` ((maskw# x4# v# `or#` maskw# x4# v2#)
`and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#` ((maskw# x5# v# `or#` maskw# x5# v2#)
`and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#` ((maskw# x6# v# `or#` maskw# x6# v2#)
`and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#` ((maskw# x7# v# `or#` maskw# x7# v2#)
`and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#` ((maskw# x8# v# `or#` maskw# x8# v2#)
`and#` int2Word# 0x80#)
(I# x9#) <- M.readArray vec $! start + 8
let !p9# = p8# `or#` ((maskw# x9# v# `or#` maskw# x9# v2#)
`and#` int2Word# 0x100#)
(I# x10#) <- M.readArray vec $! start + 9
let !p10# = p9# `or#` ((maskw# x10# v# `or#` maskw# x10# v2#)
`and#` int2Word# 0x200#)
(I# x11#) <- M.readArray vec $! start + 10
let !p11# = p10# `or#` ((maskw# x11# v# `or#` maskw# x11# v2#)
`and#` int2Word# 0x400#)
(I# x12#) <- M.readArray vec $! start + 11
let !p12# = p11# `or#` ((maskw# x12# v# `or#` maskw# x12# v2#)
`and#` int2Word# 0x800#)
(I# x13#) <- M.readArray vec $! start + 12
let !p13# = p12# `or#` ((maskw# x13# v# `or#` maskw# x13# v2#)
`and#` int2Word# 0x1000#)
(I# x14#) <- M.readArray vec $! start + 13
let !p14# = p13# `or#` ((maskw# x14# v# `or#` maskw# x14# v2#)
`and#` int2Word# 0x2000#)
(I# x15#) <- M.readArray vec $! start + 14
let !p15# = p14# `or#` ((maskw# x15# v# `or#` maskw# x15# v2#)
`and#` int2Word# 0x4000#)
(I# x16#) <- M.readArray vec $! start + 15
let !p16# = p15# `or#` ((maskw# x16# v# `or#` maskw# x16# v2#)
`and#` int2Word# 0x8000#)
return $! lineResult# p16# start
{-# INLINE lineSearch32_2 #-}
#endif
------------------------------------------------------------------------------
-- | Search through a mutable vector for one of three given int values,
-- cache-line aligned. If the start index is cache-line aligned, and there is
-- more than a cache-line's room between the start index and the end of the
-- vector, we will search the cache-line all at once using an efficient
-- branchless bit-twiddling technique. Otherwise, we will use a typical loop.
--
cacheLineSearch3 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> Int -- ^ value 3 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
cacheLineSearch3 !vec !start !value !value2 !value3 = do
#ifdef NO_C_SEARCH
let !vlen = M.length vec
let !st1 = vlen - start
let !nvlen = numWordsInCacheLine - st1
let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask
let st2 = adv - start
if nvlen > 0 || not (isCacheLineAligned start)
then naiveSearch3 vec start (min st1 st2) value value2 value3
else lineSearch3 vec start value value2 value3
#else
lineSearch3 vec start value value2 value3
#endif
{-# INLINE cacheLineSearch3 #-}
#ifdef NO_C_SEARCH
naiveSearch3 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ number of things to search
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> Int -- ^ value 3 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
naiveSearch3 !vec !start !nThings !value1 !value2 !value3 = go start
where
!doneIdx = start + nThings
go !i | i >= doneIdx = return (-1)
| otherwise = do
x <- M.readArray vec i
if x == value1 || x == value2 || x == value3
then return i
else go (i+1)
{-# INLINE naiveSearch3 #-}
lineSearch3 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> Int -- ^ value 3 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch3 | wordSize == 32 = lineSearch32_3
| otherwise = lineSearch64_3
lineSearch64_3 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> Int -- ^ value 3 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch64_3 !vec !start !(I# v#) !(I# v2#) !(I# v3#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = (maskw# x1# v# `or#` maskw# x1# v2# `or#` maskw# x1# v3#)
`and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#`
((maskw# x2# v# `or#` maskw# x2# v2# `or#` maskw# x2# v3#)
`and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#`
((maskw# x3# v# `or#` maskw# x3# v2# `or#` maskw# x3# v3#)
`and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#`
((maskw# x4# v# `or#` maskw# x4# v2# `or#` maskw# x4# v3#)
`and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#`
((maskw# x5# v# `or#` maskw# x5# v2# `or#` maskw# x5# v3#)
`and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#`
((maskw# x6# v# `or#` maskw# x6# v2# `or#` maskw# x6# v3#)
`and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#`
((maskw# x7# v# `or#` maskw# x7# v2# `or#` maskw# x7# v3#)
`and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#`
((maskw# x8# v# `or#` maskw# x8# v2# `or#` maskw# x8# v3#)
`and#` int2Word# 0x80#)
return $! lineResult# p8# start
{-# INLINE lineSearch64_3 #-}
lineSearch32_3 :: IntArray s -- ^ vector to search
-> Int -- ^ start index
-> Int -- ^ value to search for
-> Int -- ^ value 2 to search for
-> Int -- ^ value 3 to search for
-> ST s Int -- ^ dest index where it can be found, or
-- \"-1\" if not found
lineSearch32_3 !vec !start !(I# v#) !(I# v2#) !(I# v3#) = do
(I# x1#) <- M.readArray vec $! start + 0
let !p1# = (maskw# x1# v# `or#` maskw# x1# v2# `or#` maskw# x1# v3#)
`and#` int2Word# 0x1#
(I# x2#) <- M.readArray vec $! start + 1
let !p2# = p1# `or#`
((maskw# x2# v# `or#` maskw# x2# v2# `or#` maskw# x2# v3#)
`and#` int2Word# 0x2#)
(I# x3#) <- M.readArray vec $! start + 2
let !p3# = p2# `or#`
((maskw# x3# v# `or#` maskw# x3# v2# `or#` maskw# x3# v3#)
`and#` int2Word# 0x4#)
(I# x4#) <- M.readArray vec $! start + 3
let !p4# = p3# `or#`
((maskw# x4# v# `or#` maskw# x4# v2# `or#` maskw# x4# v3#)
`and#` int2Word# 0x8#)
(I# x5#) <- M.readArray vec $! start + 4
let !p5# = p4# `or#`
((maskw# x5# v# `or#` maskw# x5# v2# `or#` maskw# x5# v3#)
`and#` int2Word# 0x10#)
(I# x6#) <- M.readArray vec $! start + 5
let !p6# = p5# `or#`
((maskw# x6# v# `or#` maskw# x6# v2# `or#` maskw# x6# v3#)
`and#` int2Word# 0x20#)
(I# x7#) <- M.readArray vec $! start + 6
let !p7# = p6# `or#`
((maskw# x7# v# `or#` maskw# x7# v2# `or#` maskw# x7# v3#)
`and#` int2Word# 0x40#)
(I# x8#) <- M.readArray vec $! start + 7
let !p8# = p7# `or#`
((maskw# x8# v# `or#` maskw# x8# v2# `or#` maskw# x8# v3#)
`and#` int2Word# 0x80#)
(I# x9#) <- M.readArray vec $! start + 8
let !p9# = p8# `or#`
((maskw# x9# v# `or#` maskw# x9# v2# `or#` maskw# x9# v3#)
`and#` int2Word# 0x100#)
(I# x10#) <- M.readArray vec $! start + 9
let !p10# = p9# `or#`
((maskw# x10# v# `or#` maskw# x10# v2# `or#` maskw# x10# v3#)
`and#` int2Word# 0x200#)
(I# x11#) <- M.readArray vec $! start + 10
let !p11# = p10# `or#`
((maskw# x11# v# `or#` maskw# x11# v2# `or#` maskw# x11# v3#)
`and#` int2Word# 0x400#)
(I# x12#) <- M.readArray vec $! start + 11
let !p12# = p11# `or#`
((maskw# x12# v# `or#` maskw# x12# v2# `or#` maskw# x12# v3#)
`and#` int2Word# 0x800#)
(I# x13#) <- M.readArray vec $! start + 12
let !p13# = p12# `or#`
((maskw# x13# v# `or#` maskw# x13# v2# `or#` maskw# x13# v3#)
`and#` int2Word# 0x1000#)
(I# x14#) <- M.readArray vec $! start + 13
let !p14# = p13# `or#`
((maskw# x14# v# `or#` maskw# x14# v2# `or#` maskw# x14# v3#)
`and#` int2Word# 0x2000#)
(I# x15#) <- M.readArray vec $! start + 14
let !p15# = p14# `or#`
((maskw# x15# v# `or#` maskw# x15# v2# `or#` maskw# x15# v3#)
`and#` int2Word# 0x4000#)
(I# x16#) <- M.readArray vec $! start + 15
let !p16# = p15# `or#`
((maskw# x16# v# `or#` maskw# x16# v2# `or#` maskw# x16# v3#)
`and#` int2Word# 0x8000#)
return $! lineResult# p16# start
{-# INLINE lineSearch32_3 #-}
#endif
Jump to Line
Something went wrong with that request. Please try again.