diff --git a/core/docs/Changelog.md b/core/docs/Changelog.md index bd8386003a..2c29bee104 100644 --- a/core/docs/Changelog.md +++ b/core/docs/Changelog.md @@ -6,6 +6,12 @@ - Streamly.Data.Stream.isInfixOf - Streamly.Data.Array.writeLastN +* `Streamly.FileSystem.Dir` module is deprecated and replaced by + `Streamly.FileSystem.DirIO` module. The new module has exact same + APIs except that it uses the streamly native `Path` type instead + of `FilePath` for path representation. The new implementation is + significantly faster. + ### Internal API Changes * Remove the `Storable` constraint from several functions involving the ring diff --git a/core/src/DocTestDataStream.hs b/core/src/DocTestDataStream.hs index 727f98a15d..56f9843113 100644 --- a/core/src/DocTestDataStream.hs +++ b/core/src/DocTestDataStream.hs @@ -26,7 +26,7 @@ >>> import qualified Streamly.Data.StreamK as StreamK >>> import qualified Streamly.Data.Unfold as Unfold >>> import qualified Streamly.Data.Parser as Parser ->>> import qualified Streamly.FileSystem.Dir as Dir +>>> import qualified Streamly.FileSystem.DirIO as Dir For APIs that have not been released yet. @@ -35,5 +35,5 @@ For APIs that have not been released yet. >>> import qualified Streamly.Internal.Data.Stream as Stream >>> import qualified Streamly.Internal.Data.StreamK as StreamK >>> import qualified Streamly.Internal.Data.Unfold as Unfold ->>> import qualified Streamly.Internal.FileSystem.Dir as Dir +>>> import qualified Streamly.Internal.FileSystem.DirIO as Dir -} diff --git a/core/src/DocTestDataStreamK.hs b/core/src/DocTestDataStreamK.hs index a8dd848d3e..b5a80df643 100644 --- a/core/src/DocTestDataStreamK.hs +++ b/core/src/DocTestDataStreamK.hs @@ -12,10 +12,10 @@ >>> import qualified Streamly.Data.Parser as Parser >>> import qualified Streamly.Data.Stream as Stream >>> import qualified Streamly.Data.StreamK as StreamK ->>> import qualified Streamly.FileSystem.Dir as Dir +>>> import qualified Streamly.FileSystem.DirIO as Dir For APIs that have not been released yet. >>> import qualified Streamly.Internal.Data.StreamK as StreamK ->>> import qualified Streamly.Internal.FileSystem.Dir as Dir +>>> import qualified Streamly.Internal.FileSystem.DirIO as Dir -} diff --git a/core/src/Streamly/FileSystem/Dir.hs b/core/src/Streamly/FileSystem/Dir.hs index 7e493cd2cb..b9afd08896 100644 --- a/core/src/Streamly/FileSystem/Dir.hs +++ b/core/src/Streamly/FileSystem/Dir.hs @@ -1,3 +1,4 @@ +{-# OPTIONS_GHC -Wno-deprecations #-} -- | -- Module : Streamly.FileSystem.Dir -- Copyright : (c) 2018 Composewell Technologies @@ -12,6 +13,7 @@ -- something else. module Streamly.FileSystem.Dir +{-# DEPRECATED "Please use \"Streamly.FileSystem.DirIO\" instead." #-} ( -- * Streams read diff --git a/core/src/Streamly/FileSystem/DirIO.hs b/core/src/Streamly/FileSystem/DirIO.hs new file mode 100644 index 0000000000..224165fc7a --- /dev/null +++ b/core/src/Streamly/FileSystem/DirIO.hs @@ -0,0 +1,24 @@ +-- | +-- Module : Streamly.FileSystem.DirIO +-- Copyright : (c) 2018 Composewell Technologies +-- +-- License : BSD3 +-- Maintainer : streamly@composewell.com +-- Stability : pre-release +-- Portability : GHC +-- +-- High performance and streaming APIs for reading directories. +-- +-- >>> import qualified Streamly.FileSystem.DirIO as Dir +-- + +module Streamly.FileSystem.DirIO + ( + -- * Streams + read + , readEither + ) +where + +import Streamly.Internal.FileSystem.DirIO +import Prelude hiding (read) diff --git a/core/src/Streamly/Internal/FileSystem/Dir.hs b/core/src/Streamly/Internal/FileSystem/Dir.hs index f8588467f3..5080ee0d55 100644 --- a/core/src/Streamly/Internal/FileSystem/Dir.hs +++ b/core/src/Streamly/Internal/FileSystem/Dir.hs @@ -9,6 +9,7 @@ -- Portability : GHC module Streamly.Internal.FileSystem.Dir +{-# DEPRECATED "Please use \"Streamly.Internal.FileSystem.DirIO\" instead." #-} ( -- * Streams read diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs new file mode 100644 index 0000000000..f91ffb1be9 --- /dev/null +++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs @@ -0,0 +1,510 @@ +#include "inline.hs" + +-- | +-- Module : Streamly.Internal.FileSystem.DirIO +-- Copyright : (c) 2018 Composewell Technologies +-- +-- License : BSD3 +-- Maintainer : streamly@composewell.com +-- Portability : GHC + +module Streamly.Internal.FileSystem.DirIO + ( + -- * Streams + read + + -- read not just the names but also the inode attrs of the children. This + -- abstraction makes sense because when we read the dir contents we also + -- get the inodes, and it is cheaper to get the attrs from the inodes + -- instead of resolving the paths and get those. This abstraction may be + -- less portable as different platforms may have different attrs. To + -- optimize, we can also add a filter/pattern/parser on the names of the + -- children that we want to read. We can call that readAttrsWith? Or just + -- have the default readAttrs do that? Usually we won't need that, so it + -- may be better to keep that a separate API. + -- , readAttrs + + -- recursive read requires us to read the attributes of the children to + -- determine if something is a directory or not. Therefore, it may be a good + -- idea to have a low level routine that also spits out the attributes of + -- the files, we get that for free. We can also add a filter/pattern/parser + -- on the names of the children that we want to read. + --, readAttrsRecursive -- Options: acyclic, follow symlinks + , readFiles + , readDirs + , readEither + , readEitherPaths + , readEitherChunks + + -- We can implement this in terms of readAttrsRecursive without losing + -- perf. + -- , readEitherRecursive -- Options: acyclic, follow symlinks + -- , readAncestors -- read the parent chain using the .. entry. + -- , readAncestorsAttrs + + -- * Unfolds + -- | Use the more convenient stream APIs instead of unfolds where possible. + , reader + , fileReader + , dirReader + , eitherReader + , eitherReaderPaths + + {- + , toStreamWithBufferOf + + , readChunks + , readChunksWithBufferOf + + , toChunksWithBufferOf + , toChunks + + , write + , writeWithBufferOf + + -- Byte stream write (Streams) + , fromStream + , fromStreamWithBufferOf + + -- -- * Array Write + , writeArray + , writeChunks + , writeChunksWithBufferOf + + -- -- * Array stream Write + , fromChunks + , fromChunksWithBufferOf + -} + ) +where + +import Control.Monad.Catch (MonadCatch) +import Control.Monad.IO.Class (MonadIO(..)) +import Data.Bifunctor (bimap) +import Data.Either (isRight, isLeft, fromLeft, fromRight) +import Streamly.Data.Stream (Stream) +import Streamly.Internal.Data.Unfold (Step(..)) +import Streamly.Internal.Data.Unfold.Type (Unfold(..)) +import Streamly.Internal.FileSystem.Path (Path) +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +import qualified Streamly.Internal.Data.Fold as Fold +import Streamly.Internal.FileSystem.Windows.ReadDir + (DirStream, openDirStream, closeDirStream, readDirStreamEither) +#else +import Streamly.Internal.FileSystem.Posix.ReadDir + ( DirStream, openDirStream, closeDirStream, readDirStreamEither + , readEitherChunks) +#endif +import qualified Streamly.Internal.Data.Stream as S +import qualified Streamly.Data.Unfold as UF +import qualified Streamly.Internal.Data.Unfold as UF (mapM2, bracketIO) +import qualified Streamly.Internal.FileSystem.Path as Path + +import Prelude hiding (read) + +{- +{-# INLINABLE readArrayUpto #-} +readArrayUpto :: Int -> Handle -> IO (Array Word8) +readArrayUpto size h = do + ptr <- mallocPlainForeignPtrBytes size + -- ptr <- mallocPlainForeignPtrAlignedBytes size (alignment (undefined :: Word8)) + withForeignPtr ptr $ \p -> do + n <- hGetBufSome h p size + let v = Array + { aStart = ptr + , arrEnd = p `plusPtr` n + , arrBound = p `plusPtr` size + } + -- XXX shrink only if the diff is significant + shrinkToFit v + +------------------------------------------------------------------------------- +-- Stream of Arrays IO +------------------------------------------------------------------------------- + +-- | @toChunksWithBufferOf size h@ reads a stream of arrays from file handle @h@. +-- The maximum size of a single array is specified by @size@. The actual size +-- read may be less than or equal to @size@. +{-# INLINE _toChunksWithBufferOf #-} +_toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8) +_toChunksWithBufferOf size h = go + where + -- XXX use cons/nil instead + go = mkStream $ \_ yld _ stp -> do + arr <- liftIO $ readArrayUpto size h + if A.length arr == 0 + then stp + else yld arr go + +-- | @toChunksWithBufferOf size handle@ reads a stream of arrays from the file +-- handle @handle@. The maximum size of a single array is limited to @size@. +-- The actual size read may be less than or equal to @size@. +-- +-- @since 0.7.0 +{-# INLINE_NORMAL toChunksWithBufferOf #-} +toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8) +toChunksWithBufferOf size h = D.fromStreamD (D.Stream step ()) + where + {-# INLINE_LATE step #-} + step _ _ = do + arr <- liftIO $ readArrayUpto size h + return $ + case A.length arr of + 0 -> D.Stop + _ -> D.Yield arr () + +-- | Unfold the tuple @(bufsize, handle)@ into a stream of 'Word8' arrays. +-- Read requests to the IO device are performed using a buffer of size +-- @bufsize@. The size of an array in the resulting stream is always less than +-- or equal to @bufsize@. +-- +-- @since 0.7.0 +{-# INLINE_NORMAL readChunksWithBufferOf #-} +readChunksWithBufferOf :: MonadIO m => Unfold m (Int, Handle) (Array Word8) +readChunksWithBufferOf = Unfold step return + where + {-# INLINE_LATE step #-} + step (size, h) = do + arr <- liftIO $ readArrayUpto size h + return $ + case A.length arr of + 0 -> D.Stop + _ -> D.Yield arr (size, h) + +-- XXX read 'Array a' instead of Word8 + +-- | @toChunks handle@ reads a stream of arrays from the specified file +-- handle. The maximum size of a single array is limited to +-- @defaultChunkSize@. The actual size read may be less than or equal to +-- @defaultChunkSize@. +-- +-- > toChunks = toChunksWithBufferOf defaultChunkSize +-- +-- @since 0.7.0 +{-# INLINE toChunks #-} +toChunks :: MonadIO m => Handle -> Stream m (Array Word8) +toChunks = toChunksWithBufferOf defaultChunkSize + +-- | Unfolds a handle into a stream of 'Word8' arrays. Requests to the IO +-- device are performed using a buffer of size +-- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. The +-- size of arrays in the resulting stream are therefore less than or equal to +-- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. +-- +-- @since 0.7.0 +{-# INLINE readChunks #-} +readChunks :: MonadIO m => Unfold m Handle (Array Word8) +readChunks = UF.first readChunksWithBufferOf defaultChunkSize + +------------------------------------------------------------------------------- +-- Read a Directory to Stream +------------------------------------------------------------------------------- + +-- TODO for concurrent streams implement readahead IO. We can send multiple +-- read requests at the same time. For serial case we can use async IO. We can +-- also control the read throughput in mbps or IOPS. + +-- | Unfolds the tuple @(bufsize, handle)@ into a byte stream, read requests +-- to the IO device are performed using buffers of @bufsize@. +-- +-- @since 0.7.0 +{-# INLINE readWithBufferOf #-} +readWithBufferOf :: MonadIO m => Unfold m (Int, Handle) Word8 +readWithBufferOf = UF.many readChunksWithBufferOf A.read + +-- | @toStreamWithBufferOf bufsize handle@ reads a byte stream from a file +-- handle, reads are performed in chunks of up to @bufsize@. +-- +-- /Pre-release/ +{-# INLINE toStreamWithBufferOf #-} +toStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8 +toStreamWithBufferOf chunkSize h = AS.concat $ toChunksWithBufferOf chunkSize h +-} + +-- read child node names from a dir filtering out . and .. +-- +-- . and .. are an implementation artifact, and should probably not be used in +-- user level abstractions. +-- +-- . does not seem to have any useful purpose. If we have the path of the dir +-- then we will resolve it to get the inode of the dir so the . entry would be +-- redundant. If we have the inode of the dir to read the dir then it is +-- redundant. Is this for cross check when doing fsck? +-- +-- For .. we have the readAncestors API, we should not have this in the +-- readChildren API. + +-- XXX exception handling + +{-# INLINE streamEitherReader #-} +streamEitherReader :: MonadIO m => + Unfold m DirStream (Either Path Path) +streamEitherReader = Unfold step return + where + + step strm = do + r <- liftIO $ readDirStreamEither strm + case r of + Nothing -> return Stop + Just x -> return $ Yield x strm + +{-# INLINE streamReader #-} +streamReader :: MonadIO m => Unfold m DirStream Path +streamReader = fmap (either id id) streamEitherReader + +-- | Read a directory emitting a stream with names of the children. Filter out +-- "." and ".." entries. +-- +-- /Internal/ + +{-# INLINE reader #-} +reader :: (MonadIO m, MonadCatch m) => Unfold m Path Path +reader = +-- XXX Instead of using bracketIO for each iteration of the loop we should +-- instead yield a buffer of dir entries in each iteration and then use an +-- unfold and concat to flatten those entries. That should improve the +-- performance. + UF.bracketIO openDirStream closeDirStream streamReader + +-- XXX We can use a more general mechanism to filter the contents of a +-- directory. We can just stat each child and pass on the stat information. We +-- can then use that info to do a general filtering. "find" like filters can be +-- created. + +-- | Read directories as Left and files as Right. Filter out "." and ".." +-- entries. +-- +-- /Internal/ +-- +{-# INLINE eitherReader #-} +eitherReader :: (MonadIO m, MonadCatch m) => + Unfold m Path (Either Path Path) +eitherReader = + -- XXX The measured overhead of bracketIO is not noticeable, if it turns + -- out to be a problem for small filenames we can use getdents64 to use + -- chunked read to avoid the overhead. + UF.bracketIO openDirStream closeDirStream streamEitherReader + +{-# INLINE eitherReaderPaths #-} +eitherReaderPaths ::(MonadIO m, MonadCatch m) => + Unfold m Path (Either Path Path) +eitherReaderPaths = + let () = Path.append + in UF.mapM2 (\dir -> return . bimap (dir ) (dir )) eitherReader + +-- +-- | Read files only. +-- +-- /Internal/ +-- +{-# INLINE fileReader #-} +fileReader :: (MonadIO m, MonadCatch m) => Unfold m Path Path +fileReader = fmap (fromRight undefined) $ UF.filter isRight eitherReader + +-- | Read directories only. Filter out "." and ".." entries. +-- +-- /Internal/ +-- +{-# INLINE dirReader #-} +dirReader :: (MonadIO m, MonadCatch m) => Unfold m Path Path +dirReader = fmap (fromLeft undefined) $ UF.filter isLeft eitherReader + +-- | Raw read of a directory. +-- +-- /Pre-release/ +{-# INLINE read #-} +read :: (MonadIO m, MonadCatch m) => Path -> Stream m Path +read = S.unfold reader + +-- | Read directories as Left and files as Right. Filter out "." and ".." +-- entries. The output contains the names of the directories and files. +-- +-- /Pre-release/ +{-# INLINE readEither #-} +readEither :: (MonadIO m, MonadCatch m) => Path -> Stream m (Either Path Path) +readEither = S.unfold eitherReader + +-- | Like 'readEither' but prefix the names of the files and directories with +-- the supplied directory path. +{-# INLINE readEitherPaths #-} +readEitherPaths :: (MonadIO m, MonadCatch m) => + Path -> Stream m (Either Path Path) +readEitherPaths dir = + let () = Path.append + in fmap (bimap (dir ) (dir )) $ readEither dir + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +-- XXX Implement a custom version of readEitherChunks (like for Posix) for +-- windows as well. Also implement readEitherByteChunks. +-- +-- XXX For a fast custom implementation of traversal, the Right could be the +-- final array chunk including all files and dirs to be written to IO. The Left +-- could be list of dirs to be traversed. +-- +-- This is a generic (but slower?) version of readEitherChunks using +-- eitherReaderPaths. +{-# INLINE readEitherChunks #-} +readEitherChunks :: (MonadIO m, MonadCatch m) => + [Path] -> Stream m (Either [Path] [Path]) +readEitherChunks dirs = + -- XXX Need to use a take to limit the group size. There will be separate + -- limits for dir and files groups. + S.groupsWhile grouper collector + $ S.unfoldMany eitherReaderPaths + $ S.fromList dirs + + where + + -- XXX We can use a refold "Either dirs files" and yield the one that fills + -- and pass the remainder to the next Refold. + grouper first next = + case first of + Left _ -> isLeft next + Right _ -> isRight next + + collector = Fold.foldl' step (Right []) + + step b x = + case x of + Left x1 -> + case b of + Right _ -> Left [x1] -- initial + _ -> either (\xs -> Left (x1:xs)) Right b + Right x1 -> fmap (x1:) b +#endif + +-- | Read files only. +-- +-- /Internal/ +-- +{-# INLINE readFiles #-} +readFiles :: (MonadIO m, MonadCatch m) => Path -> Stream m Path +readFiles = S.unfold fileReader + +-- | Read directories only. +-- +-- /Internal/ +-- +{-# INLINE readDirs #-} +readDirs :: (MonadIO m, MonadCatch m) => Path -> Stream m Path +readDirs = S.unfold dirReader + +{- +------------------------------------------------------------------------------- +-- Writing +------------------------------------------------------------------------------- + +------------------------------------------------------------------------------- +-- Array IO (output) +------------------------------------------------------------------------------- + +-- | Write an 'Array' to a file handle. +-- +-- @since 0.7.0 +{-# INLINABLE writeArray #-} +writeArray :: Storable a => Handle -> Array a -> IO () +writeArray _ arr | A.length arr == 0 = return () +writeArray h Array{..} = withForeignPtr aStart $ \p -> hPutBuf h p aLen + where + aLen = + let p = unsafeForeignPtrToPtr aStart + in arrEnd `minusPtr` p + +------------------------------------------------------------------------------- +-- Stream of Arrays IO +------------------------------------------------------------------------------- + +------------------------------------------------------------------------------- +-- Writing +------------------------------------------------------------------------------- + +-- | Write a stream of arrays to a handle. +-- +-- @since 0.7.0 +{-# INLINE fromChunks #-} +fromChunks :: (MonadIO m, Storable a) + => Handle -> Stream m (Array a) -> m () +fromChunks h m = S.mapM_ (liftIO . writeArray h) m + +-- | @fromChunksWithBufferOf bufsize handle stream@ writes a stream of arrays +-- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@. +-- The chunk size is only a maximum and the actual writes could be smaller as +-- we do not split the arrays to fit exactly to the specified size. +-- +-- @since 0.7.0 +{-# INLINE fromChunksWithBufferOf #-} +fromChunksWithBufferOf :: (MonadIO m, Storable a) + => Int -> Handle -> Stream m (Array a) -> m () +fromChunksWithBufferOf n h xs = fromChunks h $ AS.compact n xs + +-- | @fromStreamWithBufferOf bufsize handle stream@ writes @stream@ to @handle@ +-- in chunks of @bufsize@. A write is performed to the IO device as soon as we +-- collect the required input size. +-- +-- @since 0.7.0 +{-# INLINE fromStreamWithBufferOf #-} +fromStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8 -> m () +fromStreamWithBufferOf n h m = fromChunks h $ S.pinnedChunksOf n m +-- fromStreamWithBufferOf n h m = fromChunks h $ AS.chunksOf n m + +-- > write = 'writeWithBufferOf' A.defaultChunkSize +-- +-- | Write a byte stream to a file handle. Accumulates the input in chunks of +-- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing. +-- +-- NOTE: This may perform better than the 'write' fold, you can try this if you +-- need some extra perf boost. +-- +-- @since 0.7.0 +{-# INLINE fromStream #-} +fromStream :: MonadIO m => Handle -> Stream m Word8 -> m () +fromStream = fromStreamWithBufferOf defaultChunkSize + +-- | Write a stream of arrays to a handle. Each array in the stream is written +-- to the device as a separate IO request. +-- +-- @since 0.7.0 +{-# INLINE writeChunks #-} +writeChunks :: (MonadIO m, Storable a) => Handle -> Fold m (Array a) () +writeChunks h = FL.drainBy (liftIO . writeArray h) + +-- | @writeChunksWithBufferOf bufsize handle@ writes a stream of arrays +-- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@. +-- We never split an array, if a single array is bigger than the specified size +-- it emitted as it is. Multiple arrays are coalesed as long as the total size +-- remains below the specified size. +-- +-- @since 0.7.0 +{-# INLINE writeChunksWithBufferOf #-} +writeChunksWithBufferOf :: (MonadIO m, Storable a) + => Int -> Handle -> Fold m (Array a) () +writeChunksWithBufferOf n h = lpackArraysChunksOf n (writeChunks h) + +-- GHC buffer size dEFAULT_FD_BUFFER_SIZE=8192 bytes. +-- +-- XXX test this +-- Note that if you use a chunk size less than 8K (GHC's default buffer +-- size) then you are advised to use 'NOBuffering' mode on the 'Handle' in case you +-- do not want buffering to occur at GHC level as well. Same thing applies to +-- writes as well. + +-- | @writeWithBufferOf reqSize handle@ writes the input stream to @handle@. +-- Bytes in the input stream are collected into a buffer until we have a chunk +-- of @reqSize@ and then written to the IO device. +-- +-- @since 0.7.0 +{-# INLINE writeWithBufferOf #-} +writeWithBufferOf :: MonadIO m => Int -> Handle -> Fold m Word8 () +writeWithBufferOf n h = FL.groupsOf n (pinnedWriteNUnsafe n) (writeChunks h) + +-- > write = 'writeWithBufferOf' A.defaultChunkSize +-- +-- | Write a byte stream to a file handle. Accumulates the input in chunks of +-- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing +-- to the IO device. +-- +-- @since 0.7.0 +{-# INLINE write #-} +write :: MonadIO m => Handle -> Fold m Word8 () +write = writeWithBufferOf defaultChunkSize +-} diff --git a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc new file mode 100644 index 0000000000..b943a11393 --- /dev/null +++ b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc @@ -0,0 +1,445 @@ +-- | +-- Module : Streamly.Internal.FileSystem.Posix.ReadDir +-- Copyright : (c) 2024 Composewell Technologies +-- +-- License : BSD3 +-- Maintainer : streamly@composewell.com +-- Portability : GHC + +module Streamly.Internal.FileSystem.Posix.ReadDir + ( +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) + DirStream + , openDirStream + , closeDirStream + , readDirStreamEither + , readEitherChunks + , readEitherByteChunks +#endif + ) +where + +#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__) +import Control.Monad.IO.Class (MonadIO(..)) +import Data.Char (ord) +import Foreign (Ptr, Word8, nullPtr, peek, peekByteOff, castPtr, plusPtr) +import Foreign.C + (resetErrno, Errno(..), getErrno, eINTR, throwErrno + , throwErrnoIfMinus1Retry_, CInt(..), CString, CChar, CSize(..)) +import Foreign.C.Error (errnoToIOError) +import Foreign.Storable (poke) +import Fusion.Plugin.Types (Fuse(..)) +import Streamly.Internal.Data.Array (Array(..)) +import Streamly.Internal.Data.MutByteArray (MutByteArray) +import Streamly.Internal.FileSystem.PosixPath (PosixPath(..)) +import Streamly.Internal.Data.Stream (Stream(..), Step(..)) + +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.MutByteArray as MutByteArray +import qualified Streamly.Internal.FileSystem.PosixPath as Path + +#include + +------------------------------------------------------------------------------- +-- From unix +------------------------------------------------------------------------------- + +-- | as 'throwErrno', but exceptions include the given path when appropriate. +-- +throwErrnoPath :: String -> PosixPath -> IO a +throwErrnoPath loc path = + do + errno <- getErrno + -- XXX toString uses strict decoding, may fail + ioError (errnoToIOError loc errno Nothing (Just (Path.toString path))) + +throwErrnoPathIfRetry :: (a -> Bool) -> String -> PosixPath -> IO a -> IO a +throwErrnoPathIfRetry pr loc rpath f = + do + res <- f + if pr res + then do + err <- getErrno + if err == eINTR + then throwErrnoPathIfRetry pr loc rpath f + else throwErrnoPath loc rpath + else return res + +throwErrnoPathIfNullRetry :: String -> PosixPath -> IO (Ptr a) -> IO (Ptr a) +throwErrnoPathIfNullRetry loc path f = + throwErrnoPathIfRetry (== nullPtr) loc path f + +------------------------------------------------------------------------------- +-- import System.Posix.Directory (closeDirStream) +-- import System.Posix.Directory.Internals (DirStream(..), CDir, CDirent) +-- requires unix >= 2.8 +------------------------------------------------------------------------------- + +data {-# CTYPE "DIR" #-} CDir +data {-# CTYPE "struct dirent" #-} CDirent + +newtype DirStream = DirStream (Ptr CDir) + +------------------------------------------------------------------------------- + +foreign import ccall unsafe "closedir" + c_closedir :: Ptr CDir -> IO CInt + +foreign import capi unsafe "dirent.h opendir" + c_opendir :: CString -> IO (Ptr CDir) + +-- XXX The "unix" package uses a wrapper over readdir __hscore_readdir (see +-- cbits/HsUnix.c in unix package) which uses readdir_r in some cases where +-- readdir is not known to be re-entrant. We are not doing that here. We are +-- assuming that readdir is re-entrant which may not be the case on some old +-- unix systems. +foreign import ccall unsafe "dirent.h readdir" + c_readdir :: Ptr CDir -> IO (Ptr CDirent) + +-- XXX Use openat instead of open so that we do not have to build and resolve +-- absolute paths. +-- +-- XXX Path is not null terminated therefore we need to make a copy even if the +-- array is pinned. +-- {-# INLINE openDirStream #-} +openDirStream :: PosixPath -> IO DirStream +openDirStream p = + Array.asCStringUnsafe (Path.toChunk p) $ \s -> do + dirp <- throwErrnoPathIfNullRetry "openDirStream" p $ c_opendir s + return (DirStream dirp) + +-- | @closeDirStream dp@ calls @closedir@ to close +-- the directory stream @dp@. +closeDirStream :: DirStream -> IO () +closeDirStream (DirStream dirp) = do + throwErrnoIfMinus1Retry_ "closeDirStream" (c_closedir dirp) + +isMetaDir :: Ptr CChar -> IO Bool +isMetaDir dname = do + -- XXX Assuming an encoding that maps "." to ".", this is true for + -- UTF8. + c1 <- peek dname + if (c1 /= fromIntegral (ord '.')) + then return False + else do + c2 :: Word8 <- peekByteOff dname 1 + if (c2 == 0) + then return True + else if (c2 /= fromIntegral (ord '.')) + then return False + else do + c3 :: Word8 <- peekByteOff dname 2 + if (c3 == 0) + then return True + else return False + +-- XXX We can use getdents64 directly so that we can use array slices from the +-- same buffer that we passed to the OS. That way we can also avoid any +-- overhead of bracket. +-- XXX Make this as Unfold to avoid returning Maybe +-- XXX Or NOINLINE some parts and inline the rest to fuse it +-- {-# INLINE readDirStreamEither #-} +readDirStreamEither :: + -- DirStream -> IO (Either (Rel (Dir Path)) (Rel (File Path))) + DirStream -> IO (Maybe (Either PosixPath PosixPath)) +readDirStreamEither (DirStream dirp) = loop + + where + + -- mkPath :: IsPath (Rel (a Path)) => Array Word8 -> Rel (a Path) + -- {-# INLINE mkPath #-} + mkPath :: Array Word8 -> PosixPath + mkPath = Path.unsafeFromChunk + + loop = do + resetErrno + ptr <- c_readdir dirp + if (ptr /= nullPtr) + then do + let dname = #{ptr struct dirent, d_name} ptr + dtype :: #{type unsigned char} <- #{peek struct dirent, d_type} ptr + -- dreclen :: #{type unsigned short} <- #{peek struct dirent, d_reclen} ptr + -- It is possible to find the name length using dreclen and then use + -- fromPtrN, but it is not straightforward because the reclen is + -- padded to 8-byte boundary. + name <- Array.fromCString (castPtr dname) + if (dtype == #const DT_DIR) + then do + isMeta <- isMetaDir dname + if isMeta + then loop + else return (Just (Left (mkPath name))) + else return (Just (Right (mkPath name))) + else do + errno <- getErrno + if (errno == eINTR) + then loop + else do + let (Errno n) = errno + if (n == 0) + -- then return (Left (mkPath (Array.fromList [46]))) + then return Nothing + else throwErrno "readDirStreamEither" + +{-# ANN type ChunkStreamState Fuse #-} +data ChunkStreamState = + ChunkStreamInit [PosixPath] [PosixPath] Int [PosixPath] Int + | ChunkStreamLoop + PosixPath -- current dir path + [PosixPath] -- remaining dirs + (Ptr CDir) -- current dir + [PosixPath] -- dirs buffered + Int -- dir count + [PosixPath] -- files buffered + Int -- file count + +-- XXX We can use a fold for collecting files and dirs. +-- XXX We can write a two fold scan to buffer and yield whichever fills first +-- like foldMany, it would be foldEither. +{-# INLINE readEitherChunks #-} +readEitherChunks :: MonadIO m => [PosixPath] -> Stream m (Either [PosixPath] [PosixPath]) +readEitherChunks alldirs = + Stream step (ChunkStreamInit alldirs [] 0 [] 0) + + where + + -- We want to keep the dir batching as low as possible for better + -- concurrency esp when the number of dirs is low. + dirMax = 4 + fileMax = 1000 + + mkPath :: Array Word8 -> PosixPath + mkPath = Path.unsafeFromChunk + + step _ (ChunkStreamInit (x:xs) dirs ndirs files nfiles) = do + DirStream dirp <- liftIO $ openDirStream x + return $ Skip (ChunkStreamLoop x xs dirp dirs ndirs files nfiles) + + step _ (ChunkStreamInit [] [] _ [] _) = + return Stop + + step _ (ChunkStreamInit [] [] _ files _) = + return $ Yield (Right files) (ChunkStreamInit [] [] 0 [] 0) + + step _ (ChunkStreamInit [] dirs _ files _) = + return $ Yield (Left dirs) (ChunkStreamInit [] [] 0 files 0) + + step _ st@(ChunkStreamLoop curdir xs dirp dirs ndirs files nfiles) = do + liftIO resetErrno + dentPtr <- liftIO $ c_readdir dirp + if (dentPtr /= nullPtr) + then do + let dname = #{ptr struct dirent, d_name} dentPtr + dtype :: #{type unsigned char} <- + liftIO $ #{peek struct dirent, d_type} dentPtr + + name <- Array.fromCString (castPtr dname) + let path = Path.append curdir (mkPath name) + + if (dtype == (#const DT_DIR)) + then do + isMeta <- liftIO $ isMetaDir dname + if isMeta + then return $ Skip st + else let dirs1 = path : dirs + ndirs1 = ndirs + 1 + in if ndirs1 >= dirMax + then return $ Yield (Left dirs1) + (ChunkStreamLoop curdir xs dirp [] 0 files nfiles) + else return $ Skip + (ChunkStreamLoop curdir xs dirp dirs1 ndirs1 files nfiles) + else let files1 = path : files + nfiles1 = nfiles + 1 + in if nfiles1 >= fileMax + then return $ Yield (Right files1) + (ChunkStreamLoop curdir xs dirp dirs ndirs [] 0) + else return $ Skip + (ChunkStreamLoop curdir xs dirp dirs ndirs files1 nfiles1) + else do + errno <- liftIO getErrno + if (errno == eINTR) + then return $ Skip st + else do + let (Errno n) = errno + liftIO $ closeDirStream (DirStream dirp) + if (n == 0) + then return $ Skip (ChunkStreamInit xs dirs ndirs files nfiles) + else liftIO $ throwErrno "readEitherChunks" + +foreign import ccall unsafe "string.h memcpy" c_memcpy + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) + +foreign import ccall unsafe "string.h strlen" c_strlen + :: Ptr CChar -> IO CSize + +{-# ANN type ChunkStreamByteState Fuse #-} +data ChunkStreamByteState = + ChunkStreamByteInit0 + | ChunkStreamByteInit [PosixPath] [PosixPath] Int MutByteArray Int + | ChunkStreamByteLoop + PosixPath -- current dir path + [PosixPath] -- remaining dirs + (Ptr CDir) -- current dir + [PosixPath] -- dirs buffered + Int -- dir count + MutByteArray + Int + | ChunkStreamByteLoopPending + (Ptr CChar) -- pending item + PosixPath -- current dir path + [PosixPath] -- remaining dirs + (Ptr CDir) -- current dir + MutByteArray + Int + +-- XXX Add follow-symlinks option. +-- XXX Detect cycles. + +-- XXX We can also emit both files and directories together this will be +-- especially useful when we are emitting chunks. +-- +-- Since we are separating paths by newlines, it cannot support newlines in +-- paths. Or we can return null separated paths as well. Provide a Mut array +-- API to replace the nulls with newlines in-place. +-- +-- We can pass a fold to make this modular, but if we are passing readdir +-- managed memory then we will have to consume it immediately. Otherwise we can +-- use getdents64 directly and use GHC managed memory instead. + +-- | Left is directories. Right is a buffer containing directories and files +-- separated by newlines. +{-# INLINE readEitherByteChunks #-} +readEitherByteChunks :: MonadIO m => + [PosixPath] -> Stream m (Either [PosixPath] (Array Word8)) +readEitherByteChunks alldirs = + Stream step (ChunkStreamByteInit0) + + where + + -- XXX A single worker may not have enough directories to list at once to + -- fill up a large buffer. We need to change the concurrency model such + -- that a worker should be able to pick up another dir from the queue + -- without emitting an output until the buffer fills. + -- + -- XXX A worker can also pick up multiple work items in one go. However, we + -- also need to keep in mind that any kind of batching might have + -- pathological cases where concurrency may be reduced. + -- + -- XXX Alternatively, we can distribute the dir stream over multiple + -- concurrent folds and return (monadic output) a stream of arrays created + -- from the output channel, then consume that stream by using a monad bind. + bufSize = 4000 + + mkPath :: Array Word8 -> PosixPath + mkPath = Path.unsafeFromChunk + + copyToBuf dstArr pos dirPath name = do + nameLen <- fmap fromIntegral (liftIO $ c_strlen name) + let PosixPath (Array dirArr start end) = dirPath + dirLen = end - start + -- XXX We may need to decode and encode the path if the + -- output encoding differs from fs encoding. + -- + -- Account for separator and newline bytes. + byteCount = dirLen + nameLen + 2 + if pos + byteCount <= bufSize + then do + -- XXX append a path separator to a dir path + -- We know it is already pinned. + MutByteArray.unsafeAsPtr dstArr (\ptr -> liftIO $ do + MutByteArray.putSliceUnsafe dirArr start dstArr pos dirLen + let ptr1 = ptr `plusPtr` (pos + dirLen) + separator = 47 :: Word8 + poke ptr1 separator + let ptr2 = ptr1 `plusPtr` 1 + _ <- c_memcpy ptr2 (castPtr name) (fromIntegral nameLen) + let ptr3 = ptr2 `plusPtr` nameLen + newline = 10 :: Word8 + poke ptr3 newline + ) + return (Just (pos + byteCount)) + else return Nothing + + step _ ChunkStreamByteInit0 = do + mbarr <- liftIO $ MutByteArray.pinnedNew bufSize + return $ Skip (ChunkStreamByteInit alldirs [] 0 mbarr 0) + + step _ (ChunkStreamByteInit (x:xs) dirs ndirs mbarr pos) = do + DirStream dirp <- liftIO $ openDirStream x + return $ Skip (ChunkStreamByteLoop x xs dirp dirs ndirs mbarr pos) + + step _ (ChunkStreamByteInit [] [] _ _ pos) | pos == 0 = + return Stop + + step _ (ChunkStreamByteInit [] [] _ mbarr pos) = + return $ Yield (Right (Array mbarr 0 pos)) (ChunkStreamByteInit [] [] 0 mbarr 0) + + step _ (ChunkStreamByteInit [] dirs _ mbarr pos) = + return $ Yield (Left dirs) (ChunkStreamByteInit [] [] 0 mbarr pos) + + step _ (ChunkStreamByteLoopPending pending curdir xs dirp mbarr pos) = do + mbarr1 <- liftIO $ MutByteArray.pinnedNew bufSize + r1 <- copyToBuf mbarr1 0 curdir pending + case r1 of + Just pos2 -> + return $ Yield (Right (Array mbarr 0 pos)) + -- When we come in this state we have emitted dirs + (ChunkStreamByteLoop curdir xs dirp [] 0 mbarr1 pos2) + Nothing -> error "Dirname too big for bufSize" + + step _ st@(ChunkStreamByteLoop curdir xs dirp dirs ndirs mbarr pos) = do + liftIO resetErrno + dentPtr <- liftIO $ c_readdir dirp + if (dentPtr /= nullPtr) + then do + let dname = #{ptr struct dirent, d_name} dentPtr + dtype :: #{type unsigned char} <- + liftIO $ #{peek struct dirent, d_type} dentPtr + + -- XXX Skips come around the entire loop, does that impact perf + -- because it has a StreamK in the middle. + -- Keep the file check first as it is more likely + if (dtype /= (#const DT_DIR)) + then do + r <- copyToBuf mbarr pos curdir dname + case r of + Just pos1 -> + return $ Skip + (ChunkStreamByteLoop curdir xs dirp dirs ndirs mbarr pos1) + Nothing -> do + if ndirs > 0 + then + return $ Yield (Left dirs) + (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos) + else + return $ Skip + (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos) + else do + isMeta <- liftIO $ isMetaDir dname + if isMeta + then return $ Skip st + else do + name <- Array.fromCString (castPtr dname) + let path = Path.append curdir (mkPath name) + dirs1 = path : dirs + ndirs1 = ndirs + 1 + r <- copyToBuf mbarr pos curdir dname + case r of + Just pos1 -> + return $ Skip + (ChunkStreamByteLoop curdir xs dirp dirs1 ndirs1 mbarr pos1) + Nothing -> do + -- We know dirs1 in not empty here + return $ Yield (Left dirs1) + (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos) + else do + errno <- liftIO getErrno + if (errno == eINTR) + then return $ Skip st + else do + let (Errno n) = errno + liftIO $ closeDirStream (DirStream dirp) + if (n == 0) + then return $ Skip (ChunkStreamByteInit xs dirs ndirs mbarr pos) + else liftIO $ throwErrno "readEitherByteChunks" +#endif diff --git a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc new file mode 100644 index 0000000000..d1f8672f53 --- /dev/null +++ b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc @@ -0,0 +1,215 @@ +-- | +-- Module : Streamly.Internal.FileSystem.Windows.ReadDir +-- Copyright : (c) 2024 Composewell Technologies +-- +-- License : BSD3 +-- Maintainer : streamly@composewell.com +-- Portability : GHC + +module Streamly.Internal.FileSystem.Windows.ReadDir + ( +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + DirStream + , openDirStream + , closeDirStream + , readDirStreamEither +#endif + ) +where + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) + +import Control.Exception (throwIO) +import Control.Monad (void) +import Control.Monad.IO.Class (MonadIO(..)) +import Data.Char (ord, isSpace) +import Data.IORef (IORef, newIORef, readIORef, writeIORef) +import Foreign.C (CInt(..), CWchar(..), Errno(..), errnoToIOError, peekCWString) +import Numeric (showHex) +import Streamly.Internal.Data.Array (Array(..)) +import Streamly.Internal.FileSystem.WindowsPath (WindowsPath(..)) +import System.IO.Error (ioeSetErrorString) + +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.FileSystem.WindowsPath as Path +import qualified System.Win32 as Win32 (failWith) + +import Foreign hiding (void) + +#include + +-- Note on A vs W suffix in APIs. +-- CreateFile vs. CreateFileW: CreateFile is a macro that expands to +-- CreateFileA or CreateFileW depending on whether Unicode support (UNICODE and +-- _UNICODE preprocessor macros) is enabled in your project. To ensure +-- consistent Unicode support, explicitly use CreateFileW. + +------------------------------------------------------------------------------ +-- Types +------------------------------------------------------------------------------ + +type BOOL = Bool +type DWORD = Word32 + +type UINT_PTR = Word +type ErrCode = DWORD +type LPCTSTR = Ptr CWchar +type WIN32_FIND_DATA = () +type HANDLE = Ptr () + +------------------------------------------------------------------------------ +-- Windows C APIs +------------------------------------------------------------------------------ + +-- XXX Note for i386, stdcall is needed instead of ccall, see Win32 +-- package/windows_cconv.h. We support only x86_64 for now. +foreign import ccall unsafe "windows.h FindFirstFileW" + c_FindFirstFileW :: LPCTSTR -> Ptr WIN32_FIND_DATA -> IO HANDLE + +foreign import ccall unsafe "windows.h FindNextFileW" + c_FindNextFileW :: HANDLE -> Ptr WIN32_FIND_DATA -> IO BOOL + +foreign import ccall unsafe "windows.h FindClose" + c_FindClose :: HANDLE -> IO BOOL + +foreign import ccall unsafe "windows.h GetLastError" + getLastError :: IO ErrCode + +foreign import ccall unsafe "windows.h LocalFree" + localFree :: Ptr a -> IO (Ptr a) + +------------------------------------------------------------------------------ +-- Haskell C APIs +------------------------------------------------------------------------------ + +foreign import ccall unsafe "maperrno_func" -- in base/cbits/Win32Utils.c + c_maperrno_func :: ErrCode -> IO Errno + +------------------------------------------------------------------------------ +-- Error Handling +------------------------------------------------------------------------------ + +-- XXX getErrorMessage and castUINTPtrToPtr require c code, so left out for +-- now. Once we replace these we can remove dependency on Win32. We can +-- possibly implement these in Haskell by directly calling the Windows API. + +foreign import ccall unsafe "getErrorMessage" + getErrorMessage :: DWORD -> IO (Ptr CWchar) + +foreign import ccall unsafe "castUINTPtrToPtr" + castUINTPtrToPtr :: UINT_PTR -> Ptr a + +failWith :: String -> ErrCode -> IO a +failWith fn_name err_code = do + c_msg <- getErrorMessage err_code + msg <- if c_msg == nullPtr + then return $ "Error 0x" ++ Numeric.showHex err_code "" + else do + msg <- peekCWString c_msg + -- We ignore failure of freeing c_msg, given we're already failing + _ <- localFree c_msg + return msg + errno <- c_maperrno_func err_code + let msg' = reverse $ dropWhile isSpace $ reverse msg -- drop trailing \n + ioerror = errnoToIOError fn_name errno Nothing Nothing + `ioeSetErrorString` msg' + throwIO ioerror + +errorWin :: String -> IO a +errorWin fn_name = do + err_code <- getLastError + failWith fn_name err_code + +failIf :: (a -> Bool) -> String -> IO a -> IO a +failIf p wh act = do + v <- act + if p v then errorWin wh else return v + +iNVALID_HANDLE_VALUE :: HANDLE +iNVALID_HANDLE_VALUE = castUINTPtrToPtr maxBound + +------------------------------------------------------------------------------ +-- Dir stream implementation +------------------------------------------------------------------------------ + +-- XXX Define this as data and unpack three fields? +newtype DirStream = + DirStream (HANDLE, IORef Bool, ForeignPtr WIN32_FIND_DATA) + +openDirStream :: WindowsPath -> IO DirStream +openDirStream p = do + let path = Path.unsafeAppend p $ Path.unsafeFromString "*" + fp_finddata <- mallocForeignPtrBytes (# const sizeof(WIN32_FIND_DATAW) ) + withForeignPtr fp_finddata $ \dataPtr -> do + handle <- + Array.asCStringUnsafe (Path.toChunk path) $ \pathPtr -> do + -- XXX Use getLastError to distinguish the case when no + -- matching file is found. See the doc of FindFirstFileW. + failIf + (== iNVALID_HANDLE_VALUE) + ("FindFirstFileW: " ++ Path.toString path) + $ c_FindFirstFileW (castPtr pathPtr) dataPtr + ref <- newIORef True + return $ DirStream (handle, ref, fp_finddata) + +closeDirStream :: DirStream -> IO () +closeDirStream (DirStream (h, _, _)) = void (c_FindClose h) + +-- XXX Keep this in sync with the isMetaDir function in Posix readdir module. +isMetaDir :: Ptr CWchar -> IO Bool +isMetaDir dname = do + -- XXX Assuming UTF16LE encoding + c1 <- peek dname + if (c1 /= fromIntegral (ord '.')) + then return False + else do + c2 :: Word8 <- peekByteOff dname 1 + if (c2 == 0) + then return True + else if (c2 /= fromIntegral (ord '.')) + then return False + else do + c3 :: Word8 <- peekByteOff dname 2 + if (c3 == 0) + then return True + else return False + +readDirStreamEither :: DirStream -> IO (Maybe (Either WindowsPath WindowsPath)) +readDirStreamEither (DirStream (h, ref, fdata)) = + withForeignPtr fdata $ \ptr -> do + firstTime <- readIORef ref + if firstTime + then do + writeIORef ref False + processEntry ptr + else findNext ptr + + where + + mkPath :: Array Word8 -> WindowsPath + mkPath = Path.unsafeFromChunk + + processEntry ptr = do + let dname = #{ptr WIN32_FIND_DATAW, cFileName} ptr + dattrs :: #{type DWORD} <- + #{peek WIN32_FIND_DATAW, dwFileAttributes} ptr + name <- Array.fromW16CString dname + if (dattrs .&. (#const FILE_ATTRIBUTE_DIRECTORY) /= 0) + then do + isMeta <- isMetaDir dname + if isMeta + then findNext ptr + else return (Just (Left (mkPath (Array.castUnsafe name)))) + else return (Just (Right (mkPath (Array.castUnsafe name)))) + + findNext ptr = do + retval <- liftIO $ c_FindNextFileW h ptr + if (retval) + then processEntry ptr + else do + err <- getLastError + if err == (# const ERROR_NO_MORE_FILES ) + then return Nothing + -- XXX Print the path in the error message + else Win32.failWith "findNextFile" err +#endif diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index d38b2adc3e..29614ba785 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -298,6 +298,8 @@ library , src/Streamly/Internal/Data/Array , src/Streamly/Internal/Data/Stream + -- Prefer OS conditionals inside the source files rather than here, + -- conditionals here do not work well with cabal2nix. if os(windows) c-sources: src/Streamly/Internal/Data/Time/Clock/Windows.c @@ -351,13 +353,11 @@ library -- streamly-core-streams , Streamly.Internal.Data.StreamK -- StreamD depends on streamly-array-types - , Streamly.Internal.Data.Stream.StreamD , Streamly.Internal.Data.Stream -- streamly-core-data , Streamly.Internal.Data.Builder , Streamly.Internal.Data.Unfold - , Streamly.Internal.Data.Fold.Chunked , Streamly.Internal.Data.Parser , Streamly.Internal.Data.ParserK , Streamly.Internal.Data.Pipe @@ -369,8 +369,6 @@ library -- streamly-core-data-arrays , Streamly.Internal.Data.Array.Generic , Streamly.Internal.Data.Array - , Streamly.Internal.Data.MutArray.Stream - , Streamly.Internal.Data.Array.Stream -- streamly-unicode-core , Streamly.Internal.Unicode.Stream @@ -397,7 +395,9 @@ library , Streamly.Internal.FileSystem.Handle , Streamly.Internal.FileSystem.File - , Streamly.Internal.FileSystem.Dir + , Streamly.Internal.FileSystem.DirIO + , Streamly.Internal.FileSystem.Posix.ReadDir + , Streamly.Internal.FileSystem.Windows.ReadDir -- Ring Arrays , Streamly.Internal.Data.Ring @@ -411,18 +411,15 @@ library -- , Streamly.Data.Binary.Encode -- Stream types -- Pre-release modules - -- , Streamly.Data.Fold.Window -- , Streamly.Data.Pipe -- , Streamly.Data.Array.Stream -- , Streamly.Data.Array.Fold -- , Streamly.Data.Array.Mut.Stream -- , Streamly.Data.Ring - -- , Streamly.Data.Ring.Unboxed + -- , Streamly.Data.Ring.Generic -- , Streamly.Data.IORef.Unboxed -- , Streamly.Data.List -- , Streamly.Data.Binary.Decode - -- , Streamly.FileSystem.File - -- , Streamly.FileSystem.Dir -- , Streamly.Data.Time.Units -- , Streamly.Data.Time.Clock -- , Streamly.Data.Tuple.Strict @@ -443,12 +440,25 @@ library , Streamly.Data.Stream , Streamly.Data.StreamK , Streamly.Data.Unfold - , Streamly.FileSystem.Dir + , Streamly.FileSystem.DirIO , Streamly.FileSystem.File , Streamly.FileSystem.Handle , Streamly.Unicode.Parser , Streamly.Unicode.Stream , Streamly.Unicode.String + + -- Deprecated in 0.3.0 + , Streamly.Internal.FileSystem.Dir + , Streamly.FileSystem.Dir + + -- Deprecated in 0.2.0 + , Streamly.Internal.Data.MutArray.Stream + , Streamly.Internal.Data.Array.Stream + , Streamly.Internal.Data.Stream.StreamD + , Streamly.Internal.Data.Fold.Chunked + + -- Only those modules should be here which are fully re-exported via some + -- other module. other-modules: Streamly.Internal.Data.Fold.Step , Streamly.Internal.Data.Fold.Type diff --git a/src/Streamly/Internal/FileSystem/Event/Linux.hs b/src/Streamly/Internal/FileSystem/Event/Linux.hs index d2cc613870..beed036072 100644 --- a/src/Streamly/Internal/FileSystem/Event/Linux.hs +++ b/src/Streamly/Internal/FileSystem/Event/Linux.hs @@ -178,6 +178,7 @@ import System.IO (Handle, hClose, IOMode(ReadMode)) import GHC.IO.Handle.FD (handleToFd) import Streamly.Internal.Data.Array (Array(..), byteLength) +import Streamly.Internal.FileSystem.Path (Path) import qualified Data.IntMap.Lazy as Map import qualified Data.List.NonEmpty as NonEmpty @@ -186,12 +187,13 @@ import qualified Streamly.Data.Array as A (fromList, writeN, getIndex) import qualified Streamly.Data.Stream as S import qualified Streamly.FileSystem.Handle as FH import qualified Streamly.Unicode.Stream as U +import qualified Streamly.Internal.FileSystem.Path as Path import qualified Streamly.Internal.Data.Array as A - ( fromStream, asCStringUnsafe, unsafePinnedAsPtr + ( asCStringUnsafe, unsafePinnedAsPtr , getSliceUnsafe, read ) -import qualified Streamly.Internal.FileSystem.Dir as Dir (readDirs) +import qualified Streamly.Internal.FileSystem.DirIO as Dir (readDirs) import qualified Streamly.Internal.Data.Parser as PR (takeEQ, fromEffect, fromFold) @@ -615,8 +617,8 @@ foreign import ccall unsafe -- separated bytes. So these may fail or convert the path in an unexpected -- manner. We should ultimately remove all usage of these. -toUtf8 :: MonadIO m => String -> m (Array Word8) -toUtf8 = A.fromStream . U.encodeUtf8 . S.fromList +toUtf8 :: MonadIO m => Path -> m (Array Word8) +toUtf8 path = pure $ Path.toChunk path utf8ToString :: Array Word8 -> String utf8ToString = runIdentity . S.fold FL.toList . U.decodeUtf8' . A.read @@ -716,12 +718,14 @@ addToWatch cfg@Config{..} watch0@(Watch handle wdMap) root0 path0 = do -- -- XXX readDirs currently uses paths as String, we need to convert it -- to "/" separated by byte arrays. - pathIsDir <- doesDirectoryExist $ utf8ToString absPath + let p = Path.unsafeFromChunk absPath + -- XXX Need a FileSystem.Stat module to remove this + pathIsDir <- doesDirectoryExist (Path.toString p) when (watchRec && pathIsDir) $ do let f = addToWatch cfg watch0 root . appendPaths path in S.fold (FL.drainMapM f) $ S.mapM toUtf8 - $ Dir.readDirs $ utf8ToString absPath + $ Dir.readDirs p foreign import ccall unsafe "sys/inotify.h inotify_rm_watch" c_inotify_rm_watch diff --git a/stack.yaml b/stack.yaml index fea7ad4e21..cc14d5c457 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,4 @@ -resolver: lts-20.13 +resolver: lts-22.0 packages: - '.' - './benchmark'