Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use qsort to sort short ByteString #267

Merged
merged 8 commits into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,7 +1514,12 @@ tails p | null p = [empty]

-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
sort :: ByteString -> ByteString
sort (BS input l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
sort (BS input l)
-- qsort outperforms counting sort for small arrays
| l <= 20 = unsafeCreate l $ \ptr -> withForeignPtr input $ \inp -> do
memcpy ptr inp (fromIntegral l)
c_sort ptr (fromIntegral l)
| otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do

_ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
withForeignPtr input (\x -> countOccurrences arr x l)
Expand Down
24 changes: 14 additions & 10 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,12 @@ module Data.ByteString.Internal (
memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)

-- * cbits functions
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CSize -> IO Word8
c_minimum, -- :: Ptr Word8 -> CSize -> IO Word8
c_count, -- :: Ptr Word8 -> CSize -> Word8 -> IO CSize
c_sort, -- :: Ptr Word8 -> CSize -> IO ()

-- * Chars
w2c, c2w, isSpaceWord8, isSpaceChar8,
Expand Down Expand Up @@ -745,16 +746,19 @@ memset p w s = c_memset p (fromIntegral w) s
--

foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
:: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()

foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
:: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()

foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_count" c_count
:: Ptr Word8 -> CULong -> Word8 -> IO CULong
:: Ptr Word8 -> CSize -> Word8 -> IO CSize

foreign import ccall unsafe "static fpstring.h fps_sort" c_sort
:: Ptr Word8 -> CSize -> IO ()
5 changes: 5 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import Gauge
import Prelude hiding (words)

import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L

import Data.ByteString.Builder
Expand Down Expand Up @@ -225,6 +226,9 @@ sanityCheckInfo =
]
]

sortInputs :: [S.ByteString]
sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25]

main :: IO ()
main = do
mapM_ putStrLn sanityCheckInfo
Expand Down Expand Up @@ -387,4 +391,5 @@ main = do
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
]
]
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
]
18 changes: 13 additions & 5 deletions cbits/fpstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "fpstring.h"

/* copy a string in reverse */
void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
p += n-1;
while (n-- != 0)
*q++ = *p--;
Expand All @@ -42,7 +42,7 @@ void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
of the duplicated string */
void fps_intersperse(unsigned char *q,
unsigned char *p,
unsigned long n,
size_t n,
unsigned char c) {

while (n > 1) {
Expand All @@ -55,7 +55,7 @@ void fps_intersperse(unsigned char *q,
}

/* find maximum char in a packed string */
unsigned char fps_maximum(unsigned char *p, unsigned long len) {
unsigned char fps_maximum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q > c)
Expand All @@ -64,7 +64,7 @@ unsigned char fps_maximum(unsigned char *p, unsigned long len) {
}

/* find minimum char in a packed string */
unsigned char fps_minimum(unsigned char *p, unsigned long len) {
unsigned char fps_minimum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q < c)
Expand All @@ -73,7 +73,7 @@ unsigned char fps_minimum(unsigned char *p, unsigned long len) {
}

/* count the number of occurences of a char in a string */
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
size_t fps_count(unsigned char *p, size_t len, unsigned char w) {
unsigned long c;
for (c = 0; len-- != 0; ++p)
if (*p == w)
Expand All @@ -88,3 +88,11 @@ void * fps_memcpy_offsets(void *dst, unsigned long dst_off,
const void *src, unsigned long src_off, size_t n) {
return memcpy(dst + dst_off, src + src_off, n);
}

int fps_compare(const void *a, const void *b) {
return (int)*(unsigned char*)a - (int)*(unsigned char*)b;
}

void fps_sort(unsigned char *p, size_t len) {
return qsort(p, len, 1, fps_compare);
}
14 changes: 7 additions & 7 deletions include/fpstring.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

#include <string.h>
#include <stdlib.h>

void fps_reverse(unsigned char *dest, unsigned char *from, unsigned long len);
void fps_intersperse(unsigned char *dest, unsigned char *from, unsigned long len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, unsigned long len);
unsigned char fps_minimum(unsigned char *p, unsigned long len);
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w);

void fps_reverse(unsigned char *dest, unsigned char *from, size_t len);
void fps_intersperse(unsigned char *dest, unsigned char *from, size_t len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, size_t len);
unsigned char fps_minimum(unsigned char *p, size_t len);
size_t fps_count(unsigned char *p, size_t len, unsigned char w);
void fps_sort(unsigned char *p, size_t len);