diff --git a/core/docs/Changelog.md b/core/docs/Changelog.md
index bd8386003a..2c29bee104 100644
--- a/core/docs/Changelog.md
+++ b/core/docs/Changelog.md
@@ -6,6 +6,12 @@
   - Streamly.Data.Stream.isInfixOf
   - Streamly.Data.Array.writeLastN
 
+* `Streamly.FileSystem.Dir` module is deprecated and replaced by
+  `Streamly.FileSystem.DirIO` module. The new module has exact same
+  APIs except that it uses the streamly native `Path` type instead
+  of `FilePath` for path representation. The new implementation is
+  significantly faster.
+
 ### Internal API Changes
 
 * Remove the `Storable` constraint from several functions involving the ring
diff --git a/core/src/DocTestDataStream.hs b/core/src/DocTestDataStream.hs
index 727f98a15d..56f9843113 100644
--- a/core/src/DocTestDataStream.hs
+++ b/core/src/DocTestDataStream.hs
@@ -26,7 +26,7 @@
 >>> import qualified Streamly.Data.StreamK as StreamK
 >>> import qualified Streamly.Data.Unfold as Unfold
 >>> import qualified Streamly.Data.Parser as Parser
->>> import qualified Streamly.FileSystem.Dir as Dir
+>>> import qualified Streamly.FileSystem.DirIO as Dir
 
 For APIs that have not been released yet.
 
@@ -35,5 +35,5 @@ For APIs that have not been released yet.
 >>> import qualified Streamly.Internal.Data.Stream as Stream
 >>> import qualified Streamly.Internal.Data.StreamK as StreamK
 >>> import qualified Streamly.Internal.Data.Unfold as Unfold
->>> import qualified Streamly.Internal.FileSystem.Dir as Dir
+>>> import qualified Streamly.Internal.FileSystem.DirIO as Dir
 -}
diff --git a/core/src/DocTestDataStreamK.hs b/core/src/DocTestDataStreamK.hs
index a8dd848d3e..b5a80df643 100644
--- a/core/src/DocTestDataStreamK.hs
+++ b/core/src/DocTestDataStreamK.hs
@@ -12,10 +12,10 @@
 >>> import qualified Streamly.Data.Parser as Parser
 >>> import qualified Streamly.Data.Stream as Stream
 >>> import qualified Streamly.Data.StreamK as StreamK
->>> import qualified Streamly.FileSystem.Dir as Dir
+>>> import qualified Streamly.FileSystem.DirIO as Dir
 
 For APIs that have not been released yet.
 
 >>> import qualified Streamly.Internal.Data.StreamK as StreamK
->>> import qualified Streamly.Internal.FileSystem.Dir as Dir
+>>> import qualified Streamly.Internal.FileSystem.DirIO as Dir
 -}
diff --git a/core/src/Streamly/FileSystem/Dir.hs b/core/src/Streamly/FileSystem/Dir.hs
index 7e493cd2cb..b9afd08896 100644
--- a/core/src/Streamly/FileSystem/Dir.hs
+++ b/core/src/Streamly/FileSystem/Dir.hs
@@ -1,3 +1,4 @@
+{-# OPTIONS_GHC -Wno-deprecations #-}
 -- |
 -- Module      : Streamly.FileSystem.Dir
 -- Copyright   : (c) 2018 Composewell Technologies
@@ -12,6 +13,7 @@
 -- something else.
 
 module Streamly.FileSystem.Dir
+{-# DEPRECATED "Please use \"Streamly.FileSystem.DirIO\" instead." #-}
     (
     -- * Streams
       read
diff --git a/core/src/Streamly/FileSystem/DirIO.hs b/core/src/Streamly/FileSystem/DirIO.hs
new file mode 100644
index 0000000000..224165fc7a
--- /dev/null
+++ b/core/src/Streamly/FileSystem/DirIO.hs
@@ -0,0 +1,24 @@
+-- |
+-- Module      : Streamly.FileSystem.DirIO
+-- Copyright   : (c) 2018 Composewell Technologies
+--
+-- License     : BSD3
+-- Maintainer  : streamly@composewell.com
+-- Stability   : pre-release
+-- Portability : GHC
+--
+-- High performance and streaming APIs for reading directories.
+--
+-- >>> import qualified Streamly.FileSystem.DirIO as Dir
+--
+
+module Streamly.FileSystem.DirIO
+    (
+    -- * Streams
+      read
+    , readEither
+    )
+where
+
+import Streamly.Internal.FileSystem.DirIO
+import Prelude hiding (read)
diff --git a/core/src/Streamly/Internal/FileSystem/Dir.hs b/core/src/Streamly/Internal/FileSystem/Dir.hs
index f8588467f3..5080ee0d55 100644
--- a/core/src/Streamly/Internal/FileSystem/Dir.hs
+++ b/core/src/Streamly/Internal/FileSystem/Dir.hs
@@ -9,6 +9,7 @@
 -- Portability : GHC
 
 module Streamly.Internal.FileSystem.Dir
+{-# DEPRECATED "Please use \"Streamly.Internal.FileSystem.DirIO\" instead." #-}
     (
     -- * Streams
       read
diff --git a/core/src/Streamly/Internal/FileSystem/DirIO.hs b/core/src/Streamly/Internal/FileSystem/DirIO.hs
new file mode 100644
index 0000000000..f91ffb1be9
--- /dev/null
+++ b/core/src/Streamly/Internal/FileSystem/DirIO.hs
@@ -0,0 +1,510 @@
+#include "inline.hs"
+
+-- |
+-- Module      : Streamly.Internal.FileSystem.DirIO
+-- Copyright   : (c) 2018 Composewell Technologies
+--
+-- License     : BSD3
+-- Maintainer  : streamly@composewell.com
+-- Portability : GHC
+
+module Streamly.Internal.FileSystem.DirIO
+    (
+    -- * Streams
+      read
+
+    -- read not just the names but also the inode attrs of the children. This
+    -- abstraction makes sense because when we read the dir contents we also
+    -- get the inodes, and it is cheaper to get the attrs from the inodes
+    -- instead of resolving the paths and get those. This abstraction may be
+    -- less portable as different platforms may have different attrs. To
+    -- optimize, we can also add a filter/pattern/parser on the names of the
+    -- children that we want to read. We can call that readAttrsWith? Or just
+    -- have the default readAttrs do that? Usually we won't need that, so it
+    -- may be better to keep that a separate API.
+    -- , readAttrs
+
+    -- recursive read requires us to read the attributes of the children to
+    -- determine if something is a directory or not. Therefore, it may be a good
+    -- idea to have a low level routine that also spits out the attributes of
+    -- the files, we get that for free. We can also add a filter/pattern/parser
+    -- on the names of the children that we want to read.
+    --, readAttrsRecursive -- Options: acyclic, follow symlinks
+    , readFiles
+    , readDirs
+    , readEither
+    , readEitherPaths
+    , readEitherChunks
+
+    -- We can implement this in terms of readAttrsRecursive without losing
+    -- perf.
+    -- , readEitherRecursive -- Options: acyclic, follow symlinks
+    -- , readAncestors -- read the parent chain using the .. entry.
+    -- , readAncestorsAttrs
+
+    -- * Unfolds
+    -- | Use the more convenient stream APIs instead of unfolds where possible.
+    , reader
+    , fileReader
+    , dirReader
+    , eitherReader
+    , eitherReaderPaths
+
+      {-
+    , toStreamWithBufferOf
+
+    , readChunks
+    , readChunksWithBufferOf
+
+    , toChunksWithBufferOf
+    , toChunks
+
+    , write
+    , writeWithBufferOf
+
+    -- Byte stream write (Streams)
+    , fromStream
+    , fromStreamWithBufferOf
+
+    -- -- * Array Write
+    , writeArray
+    , writeChunks
+    , writeChunksWithBufferOf
+
+    -- -- * Array stream Write
+    , fromChunks
+    , fromChunksWithBufferOf
+    -}
+    )
+where
+
+import Control.Monad.Catch (MonadCatch)
+import Control.Monad.IO.Class (MonadIO(..))
+import Data.Bifunctor (bimap)
+import Data.Either (isRight, isLeft, fromLeft, fromRight)
+import Streamly.Data.Stream (Stream)
+import Streamly.Internal.Data.Unfold (Step(..))
+import Streamly.Internal.Data.Unfold.Type (Unfold(..))
+import Streamly.Internal.FileSystem.Path (Path)
+#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
+import qualified Streamly.Internal.Data.Fold as Fold
+import Streamly.Internal.FileSystem.Windows.ReadDir
+    (DirStream, openDirStream, closeDirStream, readDirStreamEither)
+#else
+import Streamly.Internal.FileSystem.Posix.ReadDir
+    ( DirStream, openDirStream, closeDirStream, readDirStreamEither
+    , readEitherChunks)
+#endif
+import qualified Streamly.Internal.Data.Stream as S
+import qualified Streamly.Data.Unfold as UF
+import qualified Streamly.Internal.Data.Unfold as UF (mapM2, bracketIO)
+import qualified Streamly.Internal.FileSystem.Path as Path
+
+import Prelude hiding (read)
+
+{-
+{-# INLINABLE readArrayUpto #-}
+readArrayUpto :: Int -> Handle -> IO (Array Word8)
+readArrayUpto size h = do
+    ptr <- mallocPlainForeignPtrBytes size
+    -- ptr <- mallocPlainForeignPtrAlignedBytes size (alignment (undefined :: Word8))
+    withForeignPtr ptr $ \p -> do
+        n <- hGetBufSome h p size
+        let v = Array
+                { aStart = ptr
+                , arrEnd   = p `plusPtr` n
+                , arrBound = p `plusPtr` size
+                }
+        -- XXX shrink only if the diff is significant
+        shrinkToFit v
+
+-------------------------------------------------------------------------------
+-- Stream of Arrays IO
+-------------------------------------------------------------------------------
+
+-- | @toChunksWithBufferOf size h@ reads a stream of arrays from file handle @h@.
+-- The maximum size of a single array is specified by @size@. The actual size
+-- read may be less than or equal to @size@.
+{-# INLINE _toChunksWithBufferOf #-}
+_toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8)
+_toChunksWithBufferOf size h = go
+  where
+    -- XXX use cons/nil instead
+    go = mkStream $ \_ yld _ stp -> do
+        arr <- liftIO $ readArrayUpto size h
+        if A.length arr == 0
+        then stp
+        else yld arr go
+
+-- | @toChunksWithBufferOf size handle@ reads a stream of arrays from the file
+-- handle @handle@.  The maximum size of a single array is limited to @size@.
+-- The actual size read may be less than or equal to @size@.
+--
+-- @since 0.7.0
+{-# INLINE_NORMAL toChunksWithBufferOf #-}
+toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8)
+toChunksWithBufferOf size h = D.fromStreamD (D.Stream step ())
+  where
+    {-# INLINE_LATE step #-}
+    step _ _ = do
+        arr <- liftIO $ readArrayUpto size h
+        return $
+            case A.length arr of
+                0 -> D.Stop
+                _ -> D.Yield arr ()
+
+-- | Unfold the tuple @(bufsize, handle)@ into a stream of 'Word8' arrays.
+-- Read requests to the IO device are performed using a buffer of size
+-- @bufsize@.  The size of an array in the resulting stream is always less than
+-- or equal to @bufsize@.
+--
+-- @since 0.7.0
+{-# INLINE_NORMAL readChunksWithBufferOf #-}
+readChunksWithBufferOf :: MonadIO m => Unfold m (Int, Handle) (Array Word8)
+readChunksWithBufferOf = Unfold step return
+    where
+    {-# INLINE_LATE step #-}
+    step (size, h) = do
+        arr <- liftIO $ readArrayUpto size h
+        return $
+            case A.length arr of
+                0 -> D.Stop
+                _ -> D.Yield arr (size, h)
+
+-- XXX read 'Array a' instead of Word8
+
+-- | @toChunks handle@ reads a stream of arrays from the specified file
+-- handle.  The maximum size of a single array is limited to
+-- @defaultChunkSize@. The actual size read may be less than or equal to
+-- @defaultChunkSize@.
+--
+-- > toChunks = toChunksWithBufferOf defaultChunkSize
+--
+-- @since 0.7.0
+{-# INLINE toChunks #-}
+toChunks :: MonadIO m => Handle -> Stream m (Array Word8)
+toChunks = toChunksWithBufferOf defaultChunkSize
+
+-- | Unfolds a handle into a stream of 'Word8' arrays. Requests to the IO
+-- device are performed using a buffer of size
+-- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. The
+-- size of arrays in the resulting stream are therefore less than or equal to
+-- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'.
+--
+-- @since 0.7.0
+{-# INLINE readChunks #-}
+readChunks :: MonadIO m => Unfold m Handle (Array Word8)
+readChunks = UF.first readChunksWithBufferOf defaultChunkSize
+
+-------------------------------------------------------------------------------
+-- Read a Directory to Stream
+-------------------------------------------------------------------------------
+
+-- TODO for concurrent streams implement readahead IO. We can send multiple
+-- read requests at the same time. For serial case we can use async IO. We can
+-- also control the read throughput in mbps or IOPS.
+
+-- | Unfolds the tuple @(bufsize, handle)@ into a byte stream, read requests
+-- to the IO device are performed using buffers of @bufsize@.
+--
+-- @since 0.7.0
+{-# INLINE readWithBufferOf #-}
+readWithBufferOf :: MonadIO m => Unfold m (Int, Handle) Word8
+readWithBufferOf = UF.many readChunksWithBufferOf A.read
+
+-- | @toStreamWithBufferOf bufsize handle@ reads a byte stream from a file
+-- handle, reads are performed in chunks of up to @bufsize@.
+--
+-- /Pre-release/
+{-# INLINE toStreamWithBufferOf #-}
+toStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8
+toStreamWithBufferOf chunkSize h = AS.concat $ toChunksWithBufferOf chunkSize h
+-}
+
+-- read child node names from a dir filtering out . and ..
+--
+-- . and .. are an implementation artifact, and should probably not be used in
+-- user level abstractions.
+--
+-- . does not seem to have any useful purpose. If we have the path of the dir
+-- then we will resolve it to get the inode of the dir so the . entry would be
+-- redundant. If we have the inode of the dir to read the dir then it is
+-- redundant. Is this for cross check when doing fsck?
+--
+-- For .. we have the readAncestors API, we should not have this in the
+-- readChildren API.
+
+-- XXX exception handling
+
+{-# INLINE streamEitherReader #-}
+streamEitherReader :: MonadIO m =>
+    Unfold m DirStream (Either Path Path)
+streamEitherReader = Unfold step return
+    where
+
+    step strm = do
+        r <- liftIO $ readDirStreamEither strm
+        case r of
+            Nothing -> return Stop
+            Just x -> return $ Yield x strm
+
+{-# INLINE streamReader #-}
+streamReader :: MonadIO m => Unfold m DirStream Path
+streamReader = fmap (either id id) streamEitherReader
+
+--  | Read a directory emitting a stream with names of the children. Filter out
+--  "." and ".." entries.
+--
+--  /Internal/
+
+{-# INLINE reader #-}
+reader :: (MonadIO m, MonadCatch m) => Unfold m Path Path
+reader =
+-- XXX Instead of using bracketIO for each iteration of the loop we should
+-- instead yield a buffer of dir entries in each iteration and then use an
+-- unfold and concat to flatten those entries. That should improve the
+-- performance.
+      UF.bracketIO openDirStream closeDirStream streamReader
+
+-- XXX We can use a more general mechanism to filter the contents of a
+-- directory. We can just stat each child and pass on the stat information. We
+-- can then use that info to do a general filtering. "find" like filters can be
+-- created.
+
+-- | Read directories as Left and files as Right. Filter out "." and ".."
+-- entries.
+--
+--  /Internal/
+--
+{-# INLINE eitherReader #-}
+eitherReader :: (MonadIO m, MonadCatch m) =>
+    Unfold m Path (Either Path Path)
+eitherReader =
+    -- XXX The measured overhead of bracketIO is not noticeable, if it turns
+    -- out to be a problem for small filenames we can use getdents64 to use
+    -- chunked read to avoid the overhead.
+      UF.bracketIO openDirStream closeDirStream streamEitherReader
+
+{-# INLINE eitherReaderPaths #-}
+eitherReaderPaths ::(MonadIO m, MonadCatch m) =>
+    Unfold m Path (Either Path Path)
+eitherReaderPaths =
+    let (</>) = Path.append
+     in UF.mapM2 (\dir -> return . bimap (dir </>) (dir </>)) eitherReader
+
+--
+-- | Read files only.
+--
+--  /Internal/
+--
+{-# INLINE fileReader #-}
+fileReader :: (MonadIO m, MonadCatch m) => Unfold m Path Path
+fileReader = fmap (fromRight undefined) $ UF.filter isRight eitherReader
+
+-- | Read directories only. Filter out "." and ".." entries.
+--
+--  /Internal/
+--
+{-# INLINE dirReader #-}
+dirReader :: (MonadIO m, MonadCatch m) => Unfold m Path Path
+dirReader = fmap (fromLeft undefined) $ UF.filter isLeft eitherReader
+
+-- | Raw read of a directory.
+--
+-- /Pre-release/
+{-# INLINE read #-}
+read :: (MonadIO m, MonadCatch m) => Path -> Stream m Path
+read = S.unfold reader
+
+-- | Read directories as Left and files as Right. Filter out "." and ".."
+-- entries. The output contains the names of the directories and files.
+--
+-- /Pre-release/
+{-# INLINE readEither #-}
+readEither :: (MonadIO m, MonadCatch m) => Path -> Stream m (Either Path Path)
+readEither = S.unfold eitherReader
+
+-- | Like 'readEither' but prefix the names of the files and directories with
+-- the supplied directory path.
+{-# INLINE readEitherPaths #-}
+readEitherPaths :: (MonadIO m, MonadCatch m) =>
+    Path -> Stream m (Either Path Path)
+readEitherPaths dir =
+    let (</>) = Path.append
+     in fmap (bimap (dir </>) (dir </>)) $ readEither dir
+
+#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
+-- XXX Implement a custom version of readEitherChunks (like for Posix) for
+-- windows as well. Also implement readEitherByteChunks.
+--
+-- XXX For a fast custom implementation of traversal, the Right could be the
+-- final array chunk including all files and dirs to be written to IO. The Left
+-- could be list of dirs to be traversed.
+--
+-- This is a generic (but slower?) version of readEitherChunks using
+-- eitherReaderPaths.
+{-# INLINE readEitherChunks #-}
+readEitherChunks :: (MonadIO m, MonadCatch m) =>
+    [Path] -> Stream m (Either [Path] [Path])
+readEitherChunks dirs =
+    -- XXX Need to use a take to limit the group size. There will be separate
+    -- limits for dir and files groups.
+     S.groupsWhile grouper collector
+        $ S.unfoldMany eitherReaderPaths
+        $ S.fromList dirs
+
+    where
+
+    -- XXX We can use a refold "Either dirs files" and yield the one that fills
+    -- and pass the remainder to the next Refold.
+    grouper first next =
+        case first of
+            Left _ -> isLeft next
+            Right _ -> isRight next
+
+    collector = Fold.foldl' step (Right [])
+
+    step b x =
+        case x of
+            Left x1 ->
+                case b of
+                    Right _ -> Left [x1] -- initial
+                    _ -> either (\xs -> Left (x1:xs)) Right b
+            Right x1 -> fmap (x1:) b
+#endif
+
+-- | Read files only.
+--
+--  /Internal/
+--
+{-# INLINE readFiles #-}
+readFiles :: (MonadIO m, MonadCatch m) => Path -> Stream m Path
+readFiles = S.unfold fileReader
+
+-- | Read directories only.
+--
+--  /Internal/
+--
+{-# INLINE readDirs #-}
+readDirs :: (MonadIO m, MonadCatch m) => Path -> Stream m Path
+readDirs = S.unfold dirReader
+
+{-
+-------------------------------------------------------------------------------
+-- Writing
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------------------
+-- Array IO (output)
+-------------------------------------------------------------------------------
+
+-- | Write an 'Array' to a file handle.
+--
+-- @since 0.7.0
+{-# INLINABLE writeArray #-}
+writeArray :: Storable a => Handle -> Array a -> IO ()
+writeArray _ arr | A.length arr == 0 = return ()
+writeArray h Array{..} = withForeignPtr aStart $ \p -> hPutBuf h p aLen
+    where
+    aLen =
+        let p = unsafeForeignPtrToPtr aStart
+        in arrEnd `minusPtr` p
+
+-------------------------------------------------------------------------------
+-- Stream of Arrays IO
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------------------
+-- Writing
+-------------------------------------------------------------------------------
+
+-- | Write a stream of arrays to a handle.
+--
+-- @since 0.7.0
+{-# INLINE fromChunks #-}
+fromChunks :: (MonadIO m, Storable a)
+    => Handle -> Stream m (Array a) -> m ()
+fromChunks h m = S.mapM_ (liftIO . writeArray h) m
+
+-- | @fromChunksWithBufferOf bufsize handle stream@ writes a stream of arrays
+-- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@.
+-- The chunk size is only a maximum and the actual writes could be smaller as
+-- we do not split the arrays to fit exactly to the specified size.
+--
+-- @since 0.7.0
+{-# INLINE fromChunksWithBufferOf #-}
+fromChunksWithBufferOf :: (MonadIO m, Storable a)
+    => Int -> Handle -> Stream m (Array a) -> m ()
+fromChunksWithBufferOf n h xs = fromChunks h $ AS.compact n xs
+
+-- | @fromStreamWithBufferOf bufsize handle stream@ writes @stream@ to @handle@
+-- in chunks of @bufsize@.  A write is performed to the IO device as soon as we
+-- collect the required input size.
+--
+-- @since 0.7.0
+{-# INLINE fromStreamWithBufferOf #-}
+fromStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8 -> m ()
+fromStreamWithBufferOf n h m = fromChunks h $ S.pinnedChunksOf n m
+-- fromStreamWithBufferOf n h m = fromChunks h $ AS.chunksOf n m
+
+-- > write = 'writeWithBufferOf' A.defaultChunkSize
+--
+-- | Write a byte stream to a file handle. Accumulates the input in chunks of
+-- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing.
+--
+-- NOTE: This may perform better than the 'write' fold, you can try this if you
+-- need some extra perf boost.
+--
+-- @since 0.7.0
+{-# INLINE fromStream #-}
+fromStream :: MonadIO m => Handle -> Stream m Word8 -> m ()
+fromStream = fromStreamWithBufferOf defaultChunkSize
+
+-- | Write a stream of arrays to a handle. Each array in the stream is written
+-- to the device as a separate IO request.
+--
+-- @since 0.7.0
+{-# INLINE writeChunks #-}
+writeChunks :: (MonadIO m, Storable a) => Handle -> Fold m (Array a) ()
+writeChunks h = FL.drainBy (liftIO . writeArray h)
+
+-- | @writeChunksWithBufferOf bufsize handle@ writes a stream of arrays
+-- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@.
+-- We never split an array, if a single array is bigger than the specified size
+-- it emitted as it is. Multiple arrays are coalesed as long as the total size
+-- remains below the specified size.
+--
+-- @since 0.7.0
+{-# INLINE writeChunksWithBufferOf #-}
+writeChunksWithBufferOf :: (MonadIO m, Storable a)
+    => Int -> Handle -> Fold m (Array a) ()
+writeChunksWithBufferOf n h = lpackArraysChunksOf n (writeChunks h)
+
+-- GHC buffer size dEFAULT_FD_BUFFER_SIZE=8192 bytes.
+--
+-- XXX test this
+-- Note that if you use a chunk size less than 8K (GHC's default buffer
+-- size) then you are advised to use 'NOBuffering' mode on the 'Handle' in case you
+-- do not want buffering to occur at GHC level as well. Same thing applies to
+-- writes as well.
+
+-- | @writeWithBufferOf reqSize handle@ writes the input stream to @handle@.
+-- Bytes in the input stream are collected into a buffer until we have a chunk
+-- of @reqSize@ and then written to the IO device.
+--
+-- @since 0.7.0
+{-# INLINE writeWithBufferOf #-}
+writeWithBufferOf :: MonadIO m => Int -> Handle -> Fold m Word8 ()
+writeWithBufferOf n h = FL.groupsOf n (pinnedWriteNUnsafe n) (writeChunks h)
+
+-- > write = 'writeWithBufferOf' A.defaultChunkSize
+--
+-- | Write a byte stream to a file handle. Accumulates the input in chunks of
+-- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing
+-- to the IO device.
+--
+-- @since 0.7.0
+{-# INLINE write #-}
+write :: MonadIO m => Handle -> Fold m Word8 ()
+write = writeWithBufferOf defaultChunkSize
+-}
diff --git a/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc
new file mode 100644
index 0000000000..b943a11393
--- /dev/null
+++ b/core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc
@@ -0,0 +1,445 @@
+-- |
+-- Module      : Streamly.Internal.FileSystem.Posix.ReadDir
+-- Copyright   : (c) 2024 Composewell Technologies
+--
+-- License     : BSD3
+-- Maintainer  : streamly@composewell.com
+-- Portability : GHC
+
+module Streamly.Internal.FileSystem.Posix.ReadDir
+    (
+#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
+      DirStream
+    , openDirStream
+    , closeDirStream
+    , readDirStreamEither
+    , readEitherChunks
+    , readEitherByteChunks
+#endif
+    )
+where
+
+#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
+import Control.Monad.IO.Class (MonadIO(..))
+import Data.Char (ord)
+import Foreign (Ptr, Word8, nullPtr, peek, peekByteOff, castPtr, plusPtr)
+import Foreign.C
+    (resetErrno, Errno(..), getErrno, eINTR, throwErrno
+    , throwErrnoIfMinus1Retry_, CInt(..), CString, CChar, CSize(..))
+import Foreign.C.Error (errnoToIOError)
+import Foreign.Storable (poke)
+import Fusion.Plugin.Types (Fuse(..))
+import Streamly.Internal.Data.Array (Array(..))
+import Streamly.Internal.Data.MutByteArray (MutByteArray)
+import Streamly.Internal.FileSystem.PosixPath (PosixPath(..))
+import Streamly.Internal.Data.Stream (Stream(..), Step(..))
+
+import qualified Streamly.Internal.Data.Array as Array
+import qualified Streamly.Internal.Data.MutByteArray as MutByteArray
+import qualified Streamly.Internal.FileSystem.PosixPath as Path
+
+#include <dirent.h>
+
+-------------------------------------------------------------------------------
+-- From unix
+-------------------------------------------------------------------------------
+
+-- | as 'throwErrno', but exceptions include the given path when appropriate.
+--
+throwErrnoPath :: String -> PosixPath -> IO a
+throwErrnoPath loc path =
+  do
+    errno <- getErrno
+    -- XXX toString uses strict decoding, may fail
+    ioError (errnoToIOError loc errno Nothing (Just (Path.toString path)))
+
+throwErrnoPathIfRetry :: (a -> Bool) -> String -> PosixPath -> IO a -> IO a
+throwErrnoPathIfRetry pr loc rpath f =
+  do
+    res <- f
+    if pr res
+      then do
+        err <- getErrno
+        if err == eINTR
+          then throwErrnoPathIfRetry pr loc rpath f
+          else throwErrnoPath loc rpath
+      else return res
+
+throwErrnoPathIfNullRetry :: String -> PosixPath -> IO (Ptr a) -> IO (Ptr a)
+throwErrnoPathIfNullRetry loc path f =
+  throwErrnoPathIfRetry (== nullPtr) loc path f
+
+-------------------------------------------------------------------------------
+-- import System.Posix.Directory (closeDirStream)
+-- import System.Posix.Directory.Internals (DirStream(..), CDir, CDirent)
+-- requires unix >= 2.8
+-------------------------------------------------------------------------------
+
+data {-# CTYPE "DIR" #-} CDir
+data {-# CTYPE "struct dirent" #-} CDirent
+
+newtype DirStream = DirStream (Ptr CDir)
+
+-------------------------------------------------------------------------------
+
+foreign import ccall unsafe "closedir"
+   c_closedir :: Ptr CDir -> IO CInt
+
+foreign import capi unsafe "dirent.h opendir"
+    c_opendir :: CString  -> IO (Ptr CDir)
+
+-- XXX The "unix" package uses a wrapper over readdir __hscore_readdir (see
+-- cbits/HsUnix.c in unix package) which uses readdir_r in some cases where
+-- readdir is not known to be re-entrant. We are not doing that here. We are
+-- assuming that readdir is re-entrant which may not be the case on some old
+-- unix systems.
+foreign import ccall unsafe "dirent.h readdir"
+    c_readdir  :: Ptr CDir -> IO (Ptr CDirent)
+
+-- XXX Use openat instead of open so that we do not have to build and resolve
+-- absolute paths.
+--
+-- XXX Path is not null terminated therefore we need to make a copy even if the
+-- array is pinned.
+-- {-# INLINE openDirStream #-}
+openDirStream :: PosixPath -> IO DirStream
+openDirStream p =
+  Array.asCStringUnsafe (Path.toChunk p) $ \s -> do
+    dirp <- throwErrnoPathIfNullRetry "openDirStream" p $ c_opendir s
+    return (DirStream dirp)
+
+-- | @closeDirStream dp@ calls @closedir@ to close
+--   the directory stream @dp@.
+closeDirStream :: DirStream -> IO ()
+closeDirStream (DirStream dirp) = do
+  throwErrnoIfMinus1Retry_ "closeDirStream" (c_closedir dirp)
+
+isMetaDir :: Ptr CChar -> IO Bool
+isMetaDir dname = do
+    -- XXX Assuming an encoding that maps "." to ".", this is true for
+    -- UTF8.
+    c1 <- peek dname
+    if (c1 /= fromIntegral (ord '.'))
+    then return False
+    else do
+        c2 :: Word8 <- peekByteOff dname 1
+        if (c2 == 0)
+        then return True
+        else if (c2 /= fromIntegral (ord '.'))
+        then return False
+        else do
+            c3 :: Word8 <- peekByteOff dname 2
+            if (c3 == 0)
+            then return True
+            else return False
+
+-- XXX We can use getdents64 directly so that we can use array slices from the
+-- same buffer that we passed to the OS. That way we can also avoid any
+-- overhead of bracket.
+-- XXX Make this as Unfold to avoid returning Maybe
+-- XXX Or NOINLINE some parts and inline the rest to fuse it
+-- {-# INLINE readDirStreamEither #-}
+readDirStreamEither ::
+    -- DirStream -> IO (Either (Rel (Dir Path)) (Rel (File Path)))
+    DirStream -> IO (Maybe (Either PosixPath PosixPath))
+readDirStreamEither (DirStream dirp) = loop
+
+  where
+
+  -- mkPath :: IsPath (Rel (a Path)) => Array Word8 -> Rel (a Path)
+  -- {-# INLINE mkPath #-}
+  mkPath :: Array Word8 -> PosixPath
+  mkPath = Path.unsafeFromChunk
+
+  loop = do
+    resetErrno
+    ptr <- c_readdir dirp
+    if (ptr /= nullPtr)
+    then do
+        let dname = #{ptr struct dirent, d_name} ptr
+        dtype :: #{type unsigned char} <- #{peek struct dirent, d_type} ptr
+        -- dreclen :: #{type unsigned short} <- #{peek struct dirent, d_reclen} ptr
+        -- It is possible to find the name length using dreclen and then use
+        -- fromPtrN, but it is not straightforward because the reclen is
+        -- padded to 8-byte boundary.
+        name <- Array.fromCString (castPtr dname)
+        if (dtype == #const DT_DIR)
+        then do
+            isMeta <- isMetaDir dname
+            if isMeta
+            then loop
+            else return (Just (Left (mkPath name)))
+        else return (Just (Right (mkPath name)))
+    else do
+        errno <- getErrno
+        if (errno == eINTR)
+        then loop
+        else do
+            let (Errno n) = errno
+            if (n == 0)
+            -- then return (Left (mkPath (Array.fromList [46])))
+            then return Nothing
+            else throwErrno "readDirStreamEither"
+
+{-# ANN type ChunkStreamState Fuse #-}
+data ChunkStreamState =
+      ChunkStreamInit [PosixPath] [PosixPath] Int [PosixPath] Int
+    | ChunkStreamLoop
+        PosixPath -- current dir path
+        [PosixPath]  -- remaining dirs
+        (Ptr CDir) -- current dir
+        [PosixPath] -- dirs buffered
+        Int    -- dir count
+        [PosixPath] -- files buffered
+        Int -- file count
+
+-- XXX We can use a fold for collecting files and dirs.
+-- XXX We can write a two fold scan to buffer and yield whichever fills first
+-- like foldMany, it would be foldEither.
+{-# INLINE readEitherChunks #-}
+readEitherChunks :: MonadIO m => [PosixPath] -> Stream m (Either [PosixPath] [PosixPath])
+readEitherChunks alldirs =
+    Stream step (ChunkStreamInit alldirs [] 0 [] 0)
+
+    where
+
+    -- We want to keep the dir batching as low as possible for better
+    -- concurrency esp when the number of dirs is low.
+    dirMax = 4
+    fileMax = 1000
+
+    mkPath :: Array Word8 -> PosixPath
+    mkPath = Path.unsafeFromChunk
+
+    step _ (ChunkStreamInit (x:xs) dirs ndirs files nfiles) = do
+        DirStream dirp <- liftIO $ openDirStream x
+        return $ Skip (ChunkStreamLoop x xs dirp dirs ndirs files nfiles)
+
+    step _ (ChunkStreamInit [] [] _ [] _) =
+        return Stop
+
+    step _ (ChunkStreamInit [] [] _ files _) =
+        return $ Yield (Right files) (ChunkStreamInit [] [] 0 [] 0)
+
+    step _ (ChunkStreamInit [] dirs _ files _) =
+        return $ Yield (Left dirs) (ChunkStreamInit [] [] 0 files 0)
+
+    step _ st@(ChunkStreamLoop curdir xs dirp dirs ndirs files nfiles) = do
+        liftIO resetErrno
+        dentPtr <- liftIO $ c_readdir dirp
+        if (dentPtr /= nullPtr)
+        then do
+            let dname = #{ptr struct dirent, d_name} dentPtr
+            dtype :: #{type unsigned char} <-
+                liftIO $ #{peek struct dirent, d_type} dentPtr
+
+            name <- Array.fromCString (castPtr dname)
+            let path = Path.append curdir (mkPath name)
+
+            if (dtype == (#const DT_DIR))
+            then do
+                isMeta <- liftIO $ isMetaDir dname
+                if isMeta
+                then return $ Skip st
+                else let dirs1 = path : dirs
+                         ndirs1 = ndirs + 1
+                      in if ndirs1 >= dirMax
+                         then return $ Yield (Left dirs1)
+                            (ChunkStreamLoop curdir xs dirp [] 0 files nfiles)
+                         else return $ Skip
+                            (ChunkStreamLoop curdir xs dirp dirs1 ndirs1 files nfiles)
+            else let files1 = path : files
+                     nfiles1 = nfiles + 1
+                  in if nfiles1 >= fileMax
+                     then return $ Yield (Right files1)
+                        (ChunkStreamLoop curdir xs dirp dirs ndirs [] 0)
+                     else return $ Skip
+                        (ChunkStreamLoop curdir xs dirp dirs ndirs files1 nfiles1)
+        else do
+            errno <- liftIO getErrno
+            if (errno == eINTR)
+            then return $ Skip st
+            else do
+                let (Errno n) = errno
+                liftIO $ closeDirStream (DirStream dirp)
+                if (n == 0)
+                then return $ Skip (ChunkStreamInit xs dirs ndirs files nfiles)
+                else liftIO $ throwErrno "readEitherChunks"
+
+foreign import ccall unsafe "string.h memcpy" c_memcpy
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
+
+foreign import ccall unsafe "string.h strlen" c_strlen
+    :: Ptr CChar -> IO CSize
+
+{-# ANN type ChunkStreamByteState Fuse #-}
+data ChunkStreamByteState =
+      ChunkStreamByteInit0
+    | ChunkStreamByteInit [PosixPath] [PosixPath] Int MutByteArray Int
+    | ChunkStreamByteLoop
+        PosixPath -- current dir path
+        [PosixPath]  -- remaining dirs
+        (Ptr CDir) -- current dir
+        [PosixPath] -- dirs buffered
+        Int    -- dir count
+        MutByteArray
+        Int
+    | ChunkStreamByteLoopPending
+        (Ptr CChar) -- pending item
+        PosixPath -- current dir path
+        [PosixPath]  -- remaining dirs
+        (Ptr CDir) -- current dir
+        MutByteArray
+        Int
+
+-- XXX Add follow-symlinks option.
+-- XXX Detect cycles.
+
+-- XXX We can also emit both files and directories together this will be
+-- especially useful when we are emitting chunks.
+--
+-- Since we are separating paths by newlines, it cannot support newlines in
+-- paths. Or we can return null separated paths as well. Provide a Mut array
+-- API to replace the nulls with newlines in-place.
+--
+-- We can pass a fold to make this modular, but if we are passing readdir
+-- managed memory then we will have to consume it immediately. Otherwise we can
+-- use getdents64 directly and use GHC managed memory instead.
+
+-- | Left is directories. Right is a buffer containing directories and files
+-- separated by newlines.
+{-# INLINE readEitherByteChunks #-}
+readEitherByteChunks :: MonadIO m =>
+    [PosixPath] -> Stream m (Either [PosixPath] (Array Word8))
+readEitherByteChunks alldirs =
+    Stream step (ChunkStreamByteInit0)
+
+    where
+
+    -- XXX A single worker may not have enough directories to list at once to
+    -- fill up a large buffer. We need to change the concurrency model such
+    -- that a worker should be able to pick up another dir from the queue
+    -- without emitting an output until the buffer fills.
+    --
+    -- XXX A worker can also pick up multiple work items in one go. However, we
+    -- also need to keep in mind that any kind of batching might have
+    -- pathological cases where concurrency may be reduced.
+    --
+    -- XXX Alternatively, we can distribute the dir stream over multiple
+    -- concurrent folds and return (monadic output) a stream of arrays created
+    -- from the output channel, then consume that stream by using a monad bind.
+    bufSize = 4000
+
+    mkPath :: Array Word8 -> PosixPath
+    mkPath = Path.unsafeFromChunk
+
+    copyToBuf dstArr pos dirPath name = do
+        nameLen <- fmap fromIntegral (liftIO $ c_strlen name)
+        let PosixPath (Array dirArr start end) = dirPath
+            dirLen = end - start
+            -- XXX We may need to decode and encode the path if the
+            -- output encoding differs from fs encoding.
+            --
+            -- Account for separator and newline bytes.
+            byteCount = dirLen + nameLen + 2
+        if pos + byteCount <= bufSize
+        then do
+            -- XXX append a path separator to a dir path
+            -- We know it is already pinned.
+            MutByteArray.unsafeAsPtr dstArr (\ptr -> liftIO $ do
+                MutByteArray.putSliceUnsafe dirArr start dstArr pos dirLen
+                let ptr1 = ptr `plusPtr` (pos + dirLen)
+                    separator = 47 :: Word8
+                poke ptr1 separator
+                let ptr2 = ptr1 `plusPtr` 1
+                _ <- c_memcpy ptr2 (castPtr name) (fromIntegral nameLen)
+                let ptr3 = ptr2 `plusPtr` nameLen
+                    newline = 10 :: Word8
+                poke ptr3 newline
+                )
+            return (Just (pos + byteCount))
+        else return Nothing
+
+    step _ ChunkStreamByteInit0 = do
+        mbarr <- liftIO $ MutByteArray.pinnedNew bufSize
+        return $ Skip (ChunkStreamByteInit alldirs [] 0 mbarr 0)
+
+    step _ (ChunkStreamByteInit (x:xs) dirs ndirs mbarr pos) = do
+        DirStream dirp <- liftIO $ openDirStream x
+        return $ Skip (ChunkStreamByteLoop x xs dirp dirs ndirs mbarr pos)
+
+    step _ (ChunkStreamByteInit [] [] _ _ pos) | pos == 0 =
+        return Stop
+
+    step _ (ChunkStreamByteInit [] [] _ mbarr pos) =
+        return $ Yield (Right (Array mbarr 0 pos)) (ChunkStreamByteInit [] [] 0 mbarr 0)
+
+    step _ (ChunkStreamByteInit [] dirs _ mbarr pos) =
+        return $ Yield (Left dirs) (ChunkStreamByteInit [] [] 0 mbarr pos)
+
+    step _ (ChunkStreamByteLoopPending pending curdir xs dirp mbarr pos) = do
+        mbarr1 <- liftIO $ MutByteArray.pinnedNew bufSize
+        r1 <- copyToBuf mbarr1 0 curdir pending
+        case r1 of
+            Just pos2 ->
+                return $ Yield (Right (Array mbarr 0 pos))
+                    -- When we come in this state we have emitted dirs
+                    (ChunkStreamByteLoop curdir xs dirp [] 0 mbarr1 pos2)
+            Nothing -> error "Dirname too big for bufSize"
+
+    step _ st@(ChunkStreamByteLoop curdir xs dirp dirs ndirs mbarr pos) = do
+        liftIO resetErrno
+        dentPtr <- liftIO $ c_readdir dirp
+        if (dentPtr /= nullPtr)
+        then do
+            let dname = #{ptr struct dirent, d_name} dentPtr
+            dtype :: #{type unsigned char} <-
+                liftIO $ #{peek struct dirent, d_type} dentPtr
+
+            -- XXX Skips come around the entire loop, does that impact perf
+            -- because it has a StreamK in the middle.
+            -- Keep the file check first as it is more likely
+            if (dtype /= (#const DT_DIR))
+            then do
+                    r <- copyToBuf mbarr pos curdir dname
+                    case r of
+                        Just pos1 ->
+                            return $ Skip
+                                (ChunkStreamByteLoop curdir xs dirp dirs ndirs mbarr pos1)
+                        Nothing -> do
+                            if ndirs > 0
+                            then
+                                return $ Yield (Left dirs)
+                                    (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos)
+                            else
+                                return $ Skip
+                                    (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos)
+            else do
+                isMeta <- liftIO $ isMetaDir dname
+                if isMeta
+                then return $ Skip st
+                else do
+                    name <- Array.fromCString (castPtr dname)
+                    let path = Path.append curdir (mkPath name)
+                        dirs1 = path : dirs
+                        ndirs1 = ndirs + 1
+                    r <- copyToBuf mbarr pos curdir dname
+                    case r of
+                        Just pos1 ->
+                            return $ Skip
+                                (ChunkStreamByteLoop curdir xs dirp dirs1 ndirs1 mbarr pos1)
+                        Nothing -> do
+                            -- We know dirs1 in not empty here
+                            return $ Yield (Left dirs1)
+                                (ChunkStreamByteLoopPending dname curdir xs dirp mbarr pos)
+        else do
+            errno <- liftIO getErrno
+            if (errno == eINTR)
+            then return $ Skip st
+            else do
+                let (Errno n) = errno
+                liftIO $ closeDirStream (DirStream dirp)
+                if (n == 0)
+                then return $ Skip (ChunkStreamByteInit xs dirs ndirs mbarr pos)
+                else liftIO $ throwErrno "readEitherByteChunks"
+#endif
diff --git a/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc
new file mode 100644
index 0000000000..d1f8672f53
--- /dev/null
+++ b/core/src/Streamly/Internal/FileSystem/Windows/ReadDir.hsc
@@ -0,0 +1,215 @@
+-- |
+-- Module      : Streamly.Internal.FileSystem.Windows.ReadDir
+-- Copyright   : (c) 2024 Composewell Technologies
+--
+-- License     : BSD3
+-- Maintainer  : streamly@composewell.com
+-- Portability : GHC
+
+module Streamly.Internal.FileSystem.Windows.ReadDir
+    (
+#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
+      DirStream
+    , openDirStream
+    , closeDirStream
+    , readDirStreamEither
+#endif
+    )
+where
+
+#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
+
+import Control.Exception (throwIO)
+import Control.Monad (void)
+import Control.Monad.IO.Class (MonadIO(..))
+import Data.Char (ord, isSpace)
+import Data.IORef (IORef, newIORef, readIORef, writeIORef)
+import Foreign.C (CInt(..), CWchar(..), Errno(..), errnoToIOError, peekCWString)
+import Numeric (showHex)
+import Streamly.Internal.Data.Array (Array(..))
+import Streamly.Internal.FileSystem.WindowsPath (WindowsPath(..))
+import System.IO.Error (ioeSetErrorString)
+
+import qualified Streamly.Internal.Data.Array as Array
+import qualified Streamly.Internal.FileSystem.WindowsPath as Path
+import qualified System.Win32 as Win32 (failWith)
+
+import Foreign hiding (void)
+
+#include <windows.h>
+
+-- Note on A vs W suffix in APIs.
+-- CreateFile vs. CreateFileW: CreateFile is a macro that expands to
+-- CreateFileA or CreateFileW depending on whether Unicode support (UNICODE and
+-- _UNICODE preprocessor macros) is enabled in your project. To ensure
+-- consistent Unicode support, explicitly use CreateFileW.
+
+------------------------------------------------------------------------------
+-- Types
+------------------------------------------------------------------------------
+
+type BOOL = Bool
+type DWORD = Word32
+
+type UINT_PTR = Word
+type ErrCode = DWORD
+type LPCTSTR = Ptr CWchar
+type WIN32_FIND_DATA = ()
+type HANDLE = Ptr ()
+
+------------------------------------------------------------------------------
+-- Windows C APIs
+------------------------------------------------------------------------------
+
+-- XXX Note for i386, stdcall is needed instead of ccall, see Win32
+-- package/windows_cconv.h. We support only x86_64 for now.
+foreign import ccall unsafe "windows.h FindFirstFileW"
+  c_FindFirstFileW :: LPCTSTR -> Ptr WIN32_FIND_DATA -> IO HANDLE
+
+foreign import ccall unsafe "windows.h FindNextFileW"
+  c_FindNextFileW :: HANDLE -> Ptr WIN32_FIND_DATA -> IO BOOL
+
+foreign import ccall unsafe "windows.h FindClose"
+  c_FindClose :: HANDLE -> IO BOOL
+
+foreign import ccall unsafe "windows.h GetLastError"
+  getLastError :: IO ErrCode
+
+foreign import ccall unsafe "windows.h LocalFree"
+  localFree :: Ptr a -> IO (Ptr a)
+
+------------------------------------------------------------------------------
+-- Haskell C APIs
+------------------------------------------------------------------------------
+
+foreign import ccall unsafe "maperrno_func" -- in base/cbits/Win32Utils.c
+  c_maperrno_func :: ErrCode -> IO Errno
+
+------------------------------------------------------------------------------
+-- Error Handling
+------------------------------------------------------------------------------
+
+-- XXX getErrorMessage and castUINTPtrToPtr require c code, so left out for
+-- now. Once we replace these we can remove dependency on Win32. We can
+-- possibly implement these in Haskell by directly calling the Windows API.
+
+foreign import ccall unsafe "getErrorMessage"
+  getErrorMessage :: DWORD -> IO (Ptr CWchar)
+
+foreign import ccall unsafe "castUINTPtrToPtr"
+  castUINTPtrToPtr :: UINT_PTR -> Ptr a
+
+failWith :: String -> ErrCode -> IO a
+failWith fn_name err_code = do
+  c_msg <- getErrorMessage err_code
+  msg <- if c_msg == nullPtr
+         then return $ "Error 0x" ++ Numeric.showHex err_code ""
+         else do 
+             msg <- peekCWString c_msg
+             -- We ignore failure of freeing c_msg, given we're already failing
+             _ <- localFree c_msg
+             return msg
+  errno <- c_maperrno_func err_code
+  let msg' = reverse $ dropWhile isSpace $ reverse msg -- drop trailing \n
+      ioerror = errnoToIOError fn_name errno Nothing Nothing
+                  `ioeSetErrorString` msg'
+  throwIO ioerror
+
+errorWin :: String -> IO a
+errorWin fn_name = do
+  err_code <- getLastError
+  failWith fn_name err_code
+
+failIf :: (a -> Bool) -> String -> IO a -> IO a
+failIf p wh act = do
+  v <- act
+  if p v then errorWin wh else return v
+
+iNVALID_HANDLE_VALUE :: HANDLE
+iNVALID_HANDLE_VALUE = castUINTPtrToPtr maxBound
+
+------------------------------------------------------------------------------
+-- Dir stream implementation
+------------------------------------------------------------------------------
+
+-- XXX Define this as data and unpack three fields?
+newtype DirStream =
+    DirStream (HANDLE, IORef Bool, ForeignPtr WIN32_FIND_DATA)
+
+openDirStream :: WindowsPath -> IO DirStream
+openDirStream p = do
+    let path = Path.unsafeAppend p $ Path.unsafeFromString "*"
+    fp_finddata <- mallocForeignPtrBytes (# const sizeof(WIN32_FIND_DATAW) )
+    withForeignPtr fp_finddata $ \dataPtr -> do
+        handle <-
+            Array.asCStringUnsafe (Path.toChunk path) $ \pathPtr -> do
+                -- XXX Use getLastError to distinguish the case when no
+                -- matching file is found. See the doc of FindFirstFileW.
+                failIf 
+                    (== iNVALID_HANDLE_VALUE) 
+                    ("FindFirstFileW: " ++ Path.toString path)
+                    $ c_FindFirstFileW (castPtr pathPtr) dataPtr
+        ref <- newIORef True
+        return $ DirStream (handle, ref, fp_finddata)
+
+closeDirStream :: DirStream -> IO ()
+closeDirStream (DirStream (h, _, _)) = void (c_FindClose h)
+
+-- XXX Keep this in sync with the isMetaDir function in Posix readdir module.
+isMetaDir :: Ptr CWchar -> IO Bool
+isMetaDir dname = do
+    -- XXX Assuming UTF16LE encoding
+    c1 <- peek dname
+    if (c1 /= fromIntegral (ord '.'))
+    then return False
+    else do
+        c2 :: Word8 <- peekByteOff dname 1
+        if (c2 == 0)
+        then return True
+        else if (c2 /= fromIntegral (ord '.'))
+        then return False
+        else do
+            c3 :: Word8 <- peekByteOff dname 2
+            if (c3 == 0)
+            then return True
+            else return False
+
+readDirStreamEither :: DirStream -> IO (Maybe (Either WindowsPath WindowsPath))
+readDirStreamEither (DirStream (h, ref, fdata)) =
+    withForeignPtr fdata $ \ptr -> do
+        firstTime <- readIORef ref
+        if firstTime
+        then do
+            writeIORef ref False
+            processEntry ptr
+        else findNext ptr
+
+    where
+
+    mkPath :: Array Word8 -> WindowsPath
+    mkPath = Path.unsafeFromChunk
+
+    processEntry ptr = do
+        let dname = #{ptr WIN32_FIND_DATAW, cFileName} ptr
+        dattrs :: #{type DWORD} <-
+            #{peek WIN32_FIND_DATAW, dwFileAttributes} ptr
+        name <- Array.fromW16CString dname
+        if (dattrs .&. (#const FILE_ATTRIBUTE_DIRECTORY) /= 0)
+        then do
+            isMeta <- isMetaDir dname
+            if isMeta
+            then findNext ptr
+            else return (Just (Left (mkPath (Array.castUnsafe name))))
+        else return (Just (Right (mkPath (Array.castUnsafe name))))
+
+    findNext ptr = do
+        retval <- liftIO $ c_FindNextFileW h ptr
+        if (retval)
+        then processEntry ptr
+        else do
+            err <- getLastError
+            if err == (# const ERROR_NO_MORE_FILES )
+            then return Nothing
+            -- XXX Print the path in the error message
+            else Win32.failWith "findNextFile" err
+#endif
diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal
index d38b2adc3e..29614ba785 100644
--- a/core/streamly-core.cabal
+++ b/core/streamly-core.cabal
@@ -298,6 +298,8 @@ library
         , src/Streamly/Internal/Data/Array
         , src/Streamly/Internal/Data/Stream
 
+    -- Prefer OS conditionals inside the source files rather than here,
+    -- conditionals here do not work well with cabal2nix.
     if os(windows)
       c-sources:     src/Streamly/Internal/Data/Time/Clock/Windows.c
 
@@ -351,13 +353,11 @@ library
                      -- streamly-core-streams
                      , Streamly.Internal.Data.StreamK
                      -- StreamD depends on streamly-array-types
-                     , Streamly.Internal.Data.Stream.StreamD
                      , Streamly.Internal.Data.Stream
 
                      -- streamly-core-data
                      , Streamly.Internal.Data.Builder
                      , Streamly.Internal.Data.Unfold
-                     , Streamly.Internal.Data.Fold.Chunked
                      , Streamly.Internal.Data.Parser
                      , Streamly.Internal.Data.ParserK
                      , Streamly.Internal.Data.Pipe
@@ -369,8 +369,6 @@ library
                      -- streamly-core-data-arrays
                      , Streamly.Internal.Data.Array.Generic
                      , Streamly.Internal.Data.Array
-                     , Streamly.Internal.Data.MutArray.Stream
-                     , Streamly.Internal.Data.Array.Stream
 
                     -- streamly-unicode-core
                      , Streamly.Internal.Unicode.Stream
@@ -397,7 +395,9 @@ library
 
                      , Streamly.Internal.FileSystem.Handle
                      , Streamly.Internal.FileSystem.File
-                     , Streamly.Internal.FileSystem.Dir
+                     , Streamly.Internal.FileSystem.DirIO
+                     , Streamly.Internal.FileSystem.Posix.ReadDir
+                     , Streamly.Internal.FileSystem.Windows.ReadDir
 
                     -- Ring Arrays
                      , Streamly.Internal.Data.Ring
@@ -411,18 +411,15 @@ library
                      -- , Streamly.Data.Binary.Encode -- Stream types
 
                      -- Pre-release modules
-                     -- , Streamly.Data.Fold.Window
                      -- , Streamly.Data.Pipe
                      -- , Streamly.Data.Array.Stream
                      -- , Streamly.Data.Array.Fold
                      -- , Streamly.Data.Array.Mut.Stream
                      -- , Streamly.Data.Ring
-                     -- , Streamly.Data.Ring.Unboxed
+                     -- , Streamly.Data.Ring.Generic
                      -- , Streamly.Data.IORef.Unboxed
                      -- , Streamly.Data.List
                      -- , Streamly.Data.Binary.Decode
-                     -- , Streamly.FileSystem.File
-                     -- , Streamly.FileSystem.Dir
                      -- , Streamly.Data.Time.Units
                      -- , Streamly.Data.Time.Clock
                      -- , Streamly.Data.Tuple.Strict
@@ -443,12 +440,25 @@ library
                      , Streamly.Data.Stream
                      , Streamly.Data.StreamK
                      , Streamly.Data.Unfold
-                     , Streamly.FileSystem.Dir
+                     , Streamly.FileSystem.DirIO
                      , Streamly.FileSystem.File
                      , Streamly.FileSystem.Handle
                      , Streamly.Unicode.Parser
                      , Streamly.Unicode.Stream
                      , Streamly.Unicode.String
+
+                    -- Deprecated in 0.3.0
+                     , Streamly.Internal.FileSystem.Dir
+                     , Streamly.FileSystem.Dir
+
+                    -- Deprecated in 0.2.0
+                     , Streamly.Internal.Data.MutArray.Stream
+                     , Streamly.Internal.Data.Array.Stream
+                     , Streamly.Internal.Data.Stream.StreamD
+                     , Streamly.Internal.Data.Fold.Chunked
+
+    -- Only those modules should be here which are fully re-exported via some
+    -- other module.
     other-modules:
                       Streamly.Internal.Data.Fold.Step
                     , Streamly.Internal.Data.Fold.Type
diff --git a/src/Streamly/Internal/FileSystem/Event/Linux.hs b/src/Streamly/Internal/FileSystem/Event/Linux.hs
index d2cc613870..beed036072 100644
--- a/src/Streamly/Internal/FileSystem/Event/Linux.hs
+++ b/src/Streamly/Internal/FileSystem/Event/Linux.hs
@@ -178,6 +178,7 @@ import System.IO (Handle, hClose, IOMode(ReadMode))
 import GHC.IO.Handle.FD (handleToFd)
 
 import Streamly.Internal.Data.Array (Array(..), byteLength)
+import Streamly.Internal.FileSystem.Path (Path)
 
 import qualified Data.IntMap.Lazy as Map
 import qualified Data.List.NonEmpty as NonEmpty
@@ -186,12 +187,13 @@ import qualified Streamly.Data.Array as A (fromList, writeN, getIndex)
 import qualified Streamly.Data.Stream as S
 import qualified Streamly.FileSystem.Handle as FH
 import qualified Streamly.Unicode.Stream as U
+import qualified Streamly.Internal.FileSystem.Path as Path
 
 import qualified Streamly.Internal.Data.Array as A
-    ( fromStream, asCStringUnsafe, unsafePinnedAsPtr
+    ( asCStringUnsafe, unsafePinnedAsPtr
     , getSliceUnsafe, read
     )
-import qualified Streamly.Internal.FileSystem.Dir as Dir (readDirs)
+import qualified Streamly.Internal.FileSystem.DirIO as Dir (readDirs)
 import qualified Streamly.Internal.Data.Parser as PR
     (takeEQ, fromEffect, fromFold)
 
@@ -615,8 +617,8 @@ foreign import ccall unsafe
 -- separated bytes. So these may fail or convert the path in an unexpected
 -- manner. We should ultimately remove all usage of these.
 
-toUtf8 :: MonadIO m => String -> m (Array Word8)
-toUtf8 = A.fromStream . U.encodeUtf8 . S.fromList
+toUtf8 :: MonadIO m => Path -> m (Array Word8)
+toUtf8 path = pure $ Path.toChunk path
 
 utf8ToString :: Array Word8 -> String
 utf8ToString = runIdentity . S.fold FL.toList . U.decodeUtf8' . A.read
@@ -716,12 +718,14 @@ addToWatch cfg@Config{..} watch0@(Watch handle wdMap) root0 path0 = do
     --
     -- XXX readDirs currently uses paths as String, we need to convert it
     -- to "/" separated by byte arrays.
-    pathIsDir <- doesDirectoryExist $ utf8ToString absPath
+    let p = Path.unsafeFromChunk absPath
+    -- XXX Need a FileSystem.Stat module to remove this
+    pathIsDir <- doesDirectoryExist (Path.toString p)
     when (watchRec && pathIsDir) $ do
         let f = addToWatch cfg watch0 root . appendPaths path
             in S.fold (FL.drainMapM f)
                 $ S.mapM toUtf8
-                $ Dir.readDirs $ utf8ToString absPath
+                $ Dir.readDirs p
 
 foreign import ccall unsafe
     "sys/inotify.h inotify_rm_watch" c_inotify_rm_watch
diff --git a/stack.yaml b/stack.yaml
index fea7ad4e21..cc14d5c457 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,4 +1,4 @@
-resolver: lts-20.13
+resolver: lts-22.0
 packages:
 - '.'
 - './benchmark'