diff --git a/src/Streamly/Internal/Data/Array/Foreign.hs b/src/Streamly/Internal/Data/Array/Foreign.hs
index e6e7f9746b..403fce33c3 100644
--- a/src/Streamly/Internal/Data/Array/Foreign.hs
+++ b/src/Streamly/Internal/Data/Array/Foreign.hs
@@ -33,6 +33,7 @@ module Streamly.Internal.Data.Array.Foreign
       Array
 
     -- * Construction
+    , A.nil
 
     -- Pure, From Static Memory (Unsafe)
     -- We can use fromPtrM#, fromCStringM# and fromAddrM# to create arrays from
diff --git a/src/Streamly/Internal/Data/Array/Foreign/Type.hs b/src/Streamly/Internal/Data/Array/Foreign/Type.hs
index 6e967f96f9..dfecbd72ca 100644
--- a/src/Streamly/Internal/Data/Array/Foreign/Type.hs
+++ b/src/Streamly/Internal/Data/Array/Foreign/Type.hs
@@ -21,6 +21,7 @@ module Streamly.Internal.Data.Array.Foreign.Type
     , unsafeThaw
 
     -- * Construction
+    , nil
     , splice
 
     , fromPtr
diff --git a/src/Streamly/Internal/Unicode/Utf8.hs b/src/Streamly/Internal/Unicode/Utf8.hs
index 420751ce71..9adc0f51f9 100644
--- a/src/Streamly/Internal/Unicode/Utf8.hs
+++ b/src/Streamly/Internal/Unicode/Utf8.hs
@@ -6,62 +6,160 @@
 -- Stability   : experimental
 -- Portability : GHC
 --
+-- This module mimics the API of the text package. Some documentation snippets
+-- may have been taken from the text package.
 module Streamly.Internal.Unicode.Utf8
     (
-    -- * Type
-      Utf8
 
-    -- * Creation and elimination
-    , pack
-    , unpack
-    , toArray
+      module Streamly.Internal.Unicode.Utf8.Type
+    , module Streamly.Internal.Unicode.Utf8.Transform
+    , module Streamly.Internal.Unicode.Utf8.Eliminate
+    , module Streamly.Internal.Unicode.Utf8.Generate
+    , module Streamly.Internal.Unicode.Utf8.Reduce
+
+    -- * Folds
+
+    -- ** Special folds
+    , concat
+    , concatMap
+
+    -- * Substrings
+
+    -- ** Breaking into lines and words
+    , lines
+    --, lines'
+    , words
+    , unlines
+    , unwords
+
+    -- * Zipping
+    , zip
+    , zipWith
+
+    -- -* Ordered
+    -- , sort
+
+    -- -- * Low level operations
+    -- , copy
+    -- , unpackCString#
+
     )
 where
 
+#include "inline.hs"
+
 --------------------------------------------------------------------------------
 -- Imports
 --------------------------------------------------------------------------------
 
-import Control.DeepSeq (NFData)
-import Data.Word (Word8)
-import Streamly.Internal.Data.Array.Foreign.Type (Array)
+import Data.Char (isSpace)
 import System.IO.Unsafe (unsafePerformIO)
 
-import qualified Streamly.Internal.Data.Array.Foreign as Array
+import qualified Data.List as List
 import qualified Streamly.Internal.Data.Stream.IsStream as Stream
-import qualified Streamly.Internal.Unicode.Stream as Unicode
+
+import Streamly.Internal.Unicode.Utf8.Type
+import Streamly.Internal.Unicode.Utf8.Transform
+import Streamly.Internal.Unicode.Utf8.Eliminate
+import Streamly.Internal.Unicode.Utf8.Generate
+import Streamly.Internal.Unicode.Utf8.Reduce
+
+import Prelude hiding
+    ( concat
+    , concatMap
+    , foldr
+    , lines
+    , null
+    , unlines
+    , unwords
+    , words
+    , zip
+    , zipWith
+    )
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
 
 --------------------------------------------------------------------------------
--- Type
+-- Special folds
 --------------------------------------------------------------------------------
 
--- | A space efficient, packed, unboxed Unicode container.
-newtype Utf8 =
-    Utf8 (Array Word8)
-    deriving (NFData)
+-- XXX We should write these APIs generalized on Array a and then just use those
+-- for the Utf8 type. The generalized APIs would be more useful, they can go in
+-- the Array module itself and can be used generally for arrays, you won't need
+-- to transform arrays into stream and then back for such common operations.
+-- | Concatenate a list of 'Utf8's.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE concat #-}
+concat :: [Utf8] -> Utf8
+concat ts =
+    case Prelude.filter (not . null) ts of
+        [] -> empty
+        [t] -> t
+        xs -> Prelude.foldl1 append xs
+
+-- | Map a function over a 'Utf8' that results in a 'Utf8', and
+-- concatenate the results.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE concatMap #-}
+concatMap :: (Char -> Utf8) -> Utf8 -> Utf8
+concatMap f = concat . foldr ((:) . f) []
 
 --------------------------------------------------------------------------------
--- Functions
+-- Zipping
 --------------------------------------------------------------------------------
 
-{-# INLINE toArray #-}
-toArray :: Utf8 -> Array Word8
-toArray (Utf8 arr) = arr
+-- | 'zip' takes two 'Utf8's and returns a list of
+-- corresponding pairs of bytes. If one input 'Utf8' is short,
+-- excess elements of the longer 'Utf8' are discarded. This is
+-- equivalent to a pair of 'unpack' operations.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE zip #-}
+zip :: Utf8 -> Utf8 -> [(Char,Char)]
+zip a b =
+    unsafePerformIO
+        $ Stream.toList $ Stream.zipWith (,) (toStream a) (toStream b)
 
+-- | 'zipWith' generalises 'zip' by zipping with the function
+-- given as the first argument, instead of a tupling function.
+-- Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE zipWith #-}
+zipWith :: (Char -> Char -> Char) -> Utf8 -> Utf8 -> Utf8
+zipWith f a b = fromStream (Stream.zipWith f (toStream a) (toStream b))
 
-{-# INLINEABLE pack #-}
-pack :: String -> Utf8
-pack s =
-    Utf8
-        $ unsafePerformIO
-        $ Array.fromStreamN len $ Unicode.encodeUtf8' $ Stream.fromList s
+-- | Breaks a 'Utf8' up into a list of words, delimited by 'Char's
+-- representing white space.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE words #-}
+words :: Utf8 -> [Utf8]
+words = split isSpace
 
-    where
+-- | Breaks a 'Utf8' up into a list of 'Utf8's at
+-- newline 'Char's. The resulting strings do not contain newlines.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE lines #-}
+lines :: Utf8 -> [Utf8]
+lines = split (== '\n')
 
-    len = length s
+-- | Joins lines, after appending a terminating newline to
+-- each.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE unlines #-}
+unlines :: [Utf8] -> Utf8
+unlines = concat . List.map (`snoc` '\n')
 
-{-# INLINEABLE unpack #-}
-unpack :: Utf8 -> String
-unpack u =
-    unsafePerformIO
-        $ Stream.toList $ Unicode.decodeUtf8' $ Array.toStream $ toArray u
+-- | Joins words using single space characters.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE unwords #-}
+unwords :: [Utf8] -> Utf8
+unwords = intercalate (singleton ' ')
diff --git a/src/Streamly/Internal/Unicode/Utf8/Eliminate.hs b/src/Streamly/Internal/Unicode/Utf8/Eliminate.hs
new file mode 100644
index 0000000000..9f84c91707
--- /dev/null
+++ b/src/Streamly/Internal/Unicode/Utf8/Eliminate.hs
@@ -0,0 +1,368 @@
+-- |
+-- Module      : Streamly.Internal.Unicode.Utf8.Eliminate
+-- Copyright   : (c) 2021 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+module Streamly.Internal.Unicode.Utf8.Eliminate
+    (
+    -- * Folds
+      fold
+    , foldl
+    , foldl'
+    , foldl1
+    , foldl1'
+    , foldr
+    , foldr1
+
+    -- ** Special folds
+    , any
+    , all
+    , maximum
+    , minimum
+
+    -- * Predicates
+
+    -- XXX Comment our further stuff to help with the GHCJS compilation
+    -- , isPrefixOf
+    -- , isSuffixOf
+    -- XXX isInfixOf takes too much memory to compile
+    -- , isInfixOf
+
+    -- ** View patterns
+
+    -- XXX Comment our further stuff to help with the GHCJS compilation
+    -- , stripPrefix
+    -- , stripSuffix
+    , commonPrefixes
+
+    -- * Searching
+    , find
+    , elem
+
+    -- * Indexing
+    -- $index
+    , index
+    , findIndex
+    , countChar
+    , count
+    )
+where
+
+#include "inline.hs"
+
+--------------------------------------------------------------------------------
+-- Imports
+--------------------------------------------------------------------------------
+
+import Control.Monad.IO.Class (MonadIO(..))
+import Streamly.Internal.Data.Fold (Fold)
+import Streamly.Internal.Data.Stream.IsStream.Lift (hoist)
+import System.IO.Unsafe (unsafePerformIO)
+
+import qualified Streamly.Internal.Data.Array.Foreign.Type as Array
+import qualified Streamly.Internal.Data.Stream.IsStream as Stream
+
+import Streamly.Internal.Unicode.Utf8.Type
+
+import Prelude hiding
+    ( all
+    , any
+    , elem
+    , foldl
+    , foldl1
+    , foldr
+    , foldr1
+    , maximum
+    , minimum
+    )
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
+
+--------------------------------------------------------------------------------
+-- Reducing Streams (folds)
+--------------------------------------------------------------------------------
+
+-- XXX Try removing MonadIO constraint here
+--
+-- See this: https://github.com/composewell/streamly/issues/1457
+-- We can probably work on remving the MonadIO constraint after.
+--
+{-# INLINE fold #-}
+fold :: MonadIO m => Fold m Char b -> Utf8 -> m b
+fold f = Stream.fold f . hoist liftIO . toStream
+
+-- | 'foldl', applied to a binary operator, a starting value
+-- (typically the left-identity of the operator), and a 'Utf8',
+--
+-- /Time complexity:/ O(n)
+-- reduces the 'Utf8' using the binary operator, from left to right.
+--
+-- /Unimplemented/
+{-# INLINE foldl #-}
+foldl :: (a -> Char -> a) -> a -> Utf8 -> a
+foldl = undefined
+
+-- XXX Try using Identity monad instead of IO
+--
+-- See this: https://github.com/composewell/streamly/issues/1457
+-- We can probably work on remving the MonadIO constraint after.
+--
+-- | A strict version of 'foldl'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE foldl' #-}
+foldl' :: (a -> Char -> a) -> a -> Utf8 -> a
+foldl' f z t = unsafePerformIO $ Stream.foldl' f z (toStream t)
+
+-- | A variant of 'foldl' that has no starting value argument. Returns
+-- 'Nothing' if applied to an empty 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE foldl1 #-}
+foldl1 :: (Char -> Char -> Char) -> Utf8 -> Char
+foldl1 = undefined
+
+-- | A strict version of 'foldl1'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE foldl1' #-}
+foldl1' :: (Char -> Char -> Char) -> Utf8 -> Maybe Char
+foldl1' f t = unsafePerformIO $ Stream.foldl1' f (toStream t)
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a 'Utf8',
+-- reduces the 'Utf8' using the binary operator, from right to left.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE foldr #-}
+foldr :: (Char -> a -> a) -> a -> Utf8 -> a
+foldr f z t = unsafePerformIO $ Stream.foldr f z (toStream t)
+
+-- | A variant of 'foldr' that has no starting value argument. Returns
+-- 'Nothing' if applied to an empty 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE foldr1 #-}
+foldr1 :: (Char -> Char -> Char) -> Utf8 -> Maybe Char
+foldr1 = undefined
+
+--------------------------------------------------------------------------------
+-- Special folds
+--------------------------------------------------------------------------------
+
+-- | 'any' @p@ @t@ determines whether any character in the
+-- 'Utf8' @t@ satisfies the predicate @p@.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE any #-}
+any :: (Char -> Bool) -> Utf8 -> Bool
+any p t = unsafePerformIO $ Stream.any p (toStream t)
+
+-- | 'all' @p@ @t@ determines whether all characters in the
+-- 'Utf8' @t@ satisfy the predicate @p@.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE all #-}
+all :: (Char -> Bool) -> Utf8 -> Bool
+all p t = unsafePerformIO $ Stream.all p (toStream t)
+
+-- | 'maximum' returns the maximum value from a 'Utf8', or 'Nothing' if
+-- empty.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE maximum #-}
+maximum :: Utf8 -> Maybe Char
+maximum t = unsafePerformIO $ Stream.maximum (toStream t)
+
+-- | 'minimum' returns the minimum value from a 'Utf8', or 'Nothing' if
+-- empty.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE minimum #-}
+minimum :: Utf8 -> Maybe Char
+minimum t = unsafePerformIO $ Stream.minimum (toStream t)
+
+--------------------------------------------------------------------------------
+-- Indexing 'Utf8's
+--------------------------------------------------------------------------------
+
+-- | 'Utf8' index (subscript) operator, starting from 0.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE index #-}
+index :: Utf8 -> Int -> Maybe Char
+index t n = unsafePerformIO $ (Stream.!!) (toStream t) n
+
+-- | The 'findIndex' function takes a predicate and a 'Utf8'
+-- and returns the index of the first element in the 'Utf8' satisfying
+-- the predicate.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE findIndex #-}
+findIndex :: (Char -> Bool) -> Utf8 -> Maybe Int
+findIndex p t = unsafePerformIO $ Stream.findIndex p (toStream t)
+
+-- | The 'count' function returns the number of times the
+-- query string appears in the given 'Utf8'. An empty query string is
+-- invalid, and will cause an error to be raised.
+--
+-- /Time complexity:/ O(n*m)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL count #-}
+count :: Utf8 -> Utf8 -> Int
+count = undefined
+
+-- | The 'countChar' function returns the number of times the
+-- query element appears in the given 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE countChar #-}
+countChar :: Char -> Utf8 -> Int
+countChar c t =
+    unsafePerformIO $ Stream.length $ Stream.filter (== c) (toStream t)
+
+--------------------------------------------------------------------------------
+-- Searching
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+-- Searching with a predicate
+--------------------------------------------------------------------------------
+
+-- | The 'elem' function takes a character and a 'Utf8', and
+-- returns 'True' if the element is found in the given 'Utf8', or
+-- 'False' otherwise.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE elem #-}
+elem :: Char -> Utf8 -> Bool
+elem c = any (== c)
+
+-- | The 'find' function takes a predicate and a 'Utf8', and
+-- returns the first element matching the predicate, or 'Nothing' if
+-- there is no such element.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE find #-}
+find :: (Char -> Bool) -> Utf8 -> Maybe Char
+find p t = unsafePerformIO $ Stream.find p (toStream t)
+
+--------------------------------------------------------------------------------
+-- Predicates
+--------------------------------------------------------------------------------
+
+-- XXX This function isn't exported as it takes too much time to compile.
+-- Need to investigate.
+-- | The 'isPrefixOf' function takes two 'Utf8's and returns
+-- 'True' iff the first is a prefix of the second.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL _isPrefixOf #-}
+_isPrefixOf :: Utf8 -> Utf8 -> Bool
+_isPrefixOf a b =
+    Array.byteLength (toArray a) <= Array.byteLength (toArray b)
+        && unsafePerformIO (Stream.isPrefixOf (toStream a) (toStream b))
+
+-- XXX This function isn't exported as it takes too much time to compile.
+-- Need to investigate.
+-- | The 'isSuffixOf' function takes two 'Utf8's and returns
+-- 'True' iff the first is a suffix of the second.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE _isSuffixOf #-}
+_isSuffixOf :: Utf8 -> Utf8 -> Bool
+_isSuffixOf a b = unsafePerformIO (Stream.isSuffixOf (toStream a) (toStream b))
+
+-- XXX This specific API uses a lot of memory to compile
+-- XXX Use domain specific knowledge to implement it efficiently!
+-- | The 'isInfixOf' function takes two 'Utf8's and returns
+-- 'True' iff the first is contained, wholly and intact, anywhere
+-- within the second.
+--
+-- In (unlikely) bad cases, this function's time complexity degrades
+-- towards /O(n*m)/.
+--
+-- /Time complexity:/ O(n+m)
+{-# INLINE_NORMAL _isInfixOf #-}
+_isInfixOf :: Utf8 -> Utf8 -> Bool
+_isInfixOf a b = unsafePerformIO (Stream.isInfixOf (toStream a) (toStream b))
+
+--------------------------------------------------------------------------------
+-- View patterns
+--------------------------------------------------------------------------------
+
+-- XXX Change >> to >>> once exposed
+-- | Return the suffix of the second string if its prefix
+-- matches the entire first string.
+--
+-- Examples:
+--
+-- >> stripPrefix "foo" "foobar"
+-- Just "bar"
+--
+-- >> stripPrefix ""    "baz"
+-- Just "baz"
+--
+-- >> stripPrefix "foo" "quux"
+-- Nothing
+--
+-- /Time complexity:/ O(n)
+_stripPrefix :: Utf8 -> Utf8 -> Maybe Utf8
+_stripPrefix p t =
+    fmap fromStream $ unsafePerformIO $ Stream.stripPrefix (toStream p) (toStream t)
+
+-- XXX Change >> to >>> after implementation
+-- | Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- >> commonPrefixes "foobar" "fooquux"
+-- Just ("foo","bar","quux")
+--
+-- >> commonPrefixes "veeble" "fetzer"
+-- Nothing
+--
+-- >> commonPrefixes "" "baz"
+-- Nothing
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+commonPrefixes :: Utf8 -> Utf8 -> Maybe (Utf8, Utf8, Utf8)
+commonPrefixes = undefined
+
+-- XXX Change >> to >>> once exposed
+-- | Return the prefix of the second string if its suffix
+-- matches the entire first string.
+--
+-- Examples:
+--
+-- >> stripSuffix "bar" "foobar"
+-- Just "foo"
+--
+-- >> stripSuffix ""    "baz"
+-- Just "baz"
+--
+-- >> stripSuffix "foo" "quux"
+-- Nothing
+--
+-- /Time complexity:/ O(n)
+_stripSuffix :: Utf8 -> Utf8 -> Maybe Utf8
+_stripSuffix p t =
+    fmap fromStream
+        $ unsafePerformIO $ Stream.stripSuffix (toStream p) (toStream t)
diff --git a/src/Streamly/Internal/Unicode/Utf8/Generate.hs b/src/Streamly/Internal/Unicode/Utf8/Generate.hs
new file mode 100644
index 0000000000..5b124cb8d3
--- /dev/null
+++ b/src/Streamly/Internal/Unicode/Utf8/Generate.hs
@@ -0,0 +1,114 @@
+-- |
+-- Module      : Streamly.Internal.Unicode.Utf8.Generate
+-- Copyright   : (c) 2021 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+module Streamly.Internal.Unicode.Utf8.Generate
+    (
+    -- ** Accumulating maps
+      mapAccumL
+    , mapAccumR
+
+    -- ** Generation and unfolding
+    , replicateChar
+    , replicate
+    , unfoldr
+    , unfoldrN
+    )
+where
+
+#include "inline.hs"
+
+--------------------------------------------------------------------------------
+-- Imports
+--------------------------------------------------------------------------------
+
+import qualified Streamly.Internal.Data.Stream.IsStream as Stream
+
+import Streamly.Internal.Unicode.Utf8.Type
+
+import Prelude hiding (replicate)
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
+
+--------------------------------------------------------------------------------
+-- Building 'Utf8's
+--------------------------------------------------------------------------------
+
+-- | Like a combination of 'map' and 'foldl''. Applies a
+-- function to each element of a 'Utf8', passing an accumulating
+-- parameter from left to right, and returns a final 'Utf8'.  Performs
+-- replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE mapAccumL #-}
+mapAccumL :: (a -> Char -> (a,Char)) -> a -> Utf8 -> (a, Utf8)
+mapAccumL = undefined
+
+-- | The 'mapAccumR' function behaves like a combination of 'map' and
+-- a strict 'foldr'; it applies a function to each element of a
+-- 'Utf8', passing an accumulating parameter from right to left, and
+-- returning a final value of this accumulator together with the new
+-- 'Utf8'.
+-- Performs replacement on invalid scalar values.
+--
+-- /Unimplemented/
+{-# INLINE mapAccumR #-}
+mapAccumR :: (a -> Char -> (a,Char)) -> a -> Utf8 -> (a, Utf8)
+mapAccumR = undefined
+
+--------------------------------------------------------------------------------
+-- Generating and unfolding 'Utf8's
+--------------------------------------------------------------------------------
+
+-- | 'replicate' @n@ @t@ is a 'Utf8' consisting of the input
+-- @t@ repeated @n@ times.
+--
+-- /Time complexity:/ O(n*m)
+--
+-- /Unimplemented/
+{-# INLINE replicate #-}
+replicate :: Int -> Utf8 -> Utf8
+replicate = undefined
+
+-- | 'replicateChar' @n@ @c@ is a 'Utf8' of length @n@ with @c@ the
+-- value of every element.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE replicateChar #-}
+replicateChar :: Int -> Char -> Utf8
+replicateChar n c = fromStream (Stream.replicate n c)
+
+-- | The 'unfoldr'
+-- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a
+-- 'Utf8' from a seed value. The function takes the element and
+-- returns 'Nothing' if it is done producing the 'Utf8', otherwise
+-- 'Just' @(a,b)@.  In this case, @a@ is the next 'Char' in the
+-- string, and @b@ is the seed value for further production.
+-- Performs replacemsent on invalid scalar values.
+--
+-- /Time complexity:/ O(n), where @n@ is the length of the result.
+{-# INLINE unfoldr #-}
+unfoldr :: (a -> Maybe (Char, a)) -> a -> Utf8
+unfoldr f s = fromStream (Stream.unfoldr f s)
+
+-- | Like 'unfoldr', 'unfoldrN' builds a 'Utf8' from a seed
+-- value. However, the length of the result should be limited by the
+-- first argument to 'unfoldrN'. This function is more efficient than
+-- 'unfoldr' when the maximum length of the result is known and
+-- correct, otherwise its performance is similar to 'unfoldr'.
+-- Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE unfoldrN #-}
+unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> Utf8
+unfoldrN = undefined
diff --git a/src/Streamly/Internal/Unicode/Utf8/Reduce.hs b/src/Streamly/Internal/Unicode/Utf8/Reduce.hs
new file mode 100644
index 0000000000..0310f95629
--- /dev/null
+++ b/src/Streamly/Internal/Unicode/Utf8/Reduce.hs
@@ -0,0 +1,285 @@
+-- |
+-- Module      : Streamly.Internal.Unicode.Utf8.Reduce
+-- Copyright   : (c) 2021 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+module Streamly.Internal.Unicode.Utf8.Reduce
+    (
+    -- * Substrings
+
+    -- ** Breaking strings
+      splitAt
+    , breakOn
+    , breakOnEnd
+    , break
+    , span
+    , group
+    , groupBy
+    , inits
+    , tails
+
+    -- ** Breaking into many substrings
+    -- $split
+    , splitOn
+    , split
+    , chunksOf
+
+    -- * Searching
+    , breakOnAll
+    , partition
+    )
+where
+
+#include "inline.hs"
+
+--------------------------------------------------------------------------------
+-- Imports
+--------------------------------------------------------------------------------
+
+import System.IO.Unsafe (unsafePerformIO)
+
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Stream.IsStream as Stream
+
+import Streamly.Internal.Unicode.Utf8.Type
+
+import Prelude hiding (break, span, splitAt)
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
+
+--------------------------------------------------------------------------------
+-- Substrings
+--------------------------------------------------------------------------------
+
+-- | 'splitAt' @n t@ returns a pair whose first element is a
+-- prefix of @t@ of length @n@, and whose second is the remainder of
+-- the string. It is equivalent to @('take' n t, 'drop' n t)@.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+splitAt :: Int -> Utf8 -> (Utf8, Utf8)
+splitAt = undefined
+
+-- | 'span', applied to a predicate @p@ and text @t@, returns
+-- a pair whose first element is the longest prefix (possibly empty)
+-- of @t@ of elements that satisfy @p@, and whose second is the
+-- remainder of the list.
+--
+-- >> Utf8.span (=='0') "000AB"
+-- ("000","AB")
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE span #-}
+span :: (Char -> Bool) -> Utf8 -> (Utf8, Utf8)
+span = undefined
+
+-- | 'break' is like 'span', but the prefix returned is
+-- over elements that fail the predicate @p@.
+--
+-- >> Utf8.break (=='c') "180cm"
+-- ("180","cm")
+--
+-- /Time complexity:/ O(n)
+{-# INLINE break #-}
+break :: (Char -> Bool) -> Utf8 -> (Utf8, Utf8)
+break p = span (not . p)
+
+-- | Group characters in a string according to a predicate.
+--
+-- /Time complexity:/ O(n)
+groupBy :: (Char -> Char -> Bool) -> Utf8 -> [Utf8]
+groupBy p = unsafePerformIO . Stream.toList . Stream.groupsBy p write . toStream
+
+-- | Group characters in a string by equality.
+--
+-- /Time complexity:/ O(n)
+group :: Utf8 -> [Utf8]
+group = groupBy (==)
+
+-- | Return all initial segments of the given 'Utf8', shortest
+-- first.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+inits :: Utf8 -> [Utf8]
+inits = undefined
+
+-- | Return all final segments of the given 'Utf8', longest
+-- first.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+tails :: Utf8 -> [Utf8]
+tails = undefined
+
+-- XXX Add time complexity after implementation
+-- | Break a 'Utf8' into pieces separated by the first 'Utf8'
+-- argument (which cannot be empty), consuming the delimiter. An empty
+-- delimiter is invalid, and will cause an error to be raised.
+--
+-- Examples:
+--
+-- >> splitOn "\r\n" "a\r\nb\r\nd\r\ne"
+-- ["a","b","d","e"]
+--
+-- >> splitOn "aaa"  "aaaXaaaXaaaXaaa"
+-- ["","X","X","X",""]
+--
+-- >> splitOn "x"    "x"
+-- ["",""]
+--
+-- and
+--
+-- > intercalate s . splitOn s         == id
+-- > splitOn (singleton c)             == split (==c)
+--
+-- (Note: the string @s@ to split on above cannot be empty.)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL splitOn #-}
+splitOn :: Utf8
+        -- ^ String to split on. If this string is empty, an error
+        -- will occur.
+        -> Utf8
+        -- ^ Input text.
+        -> [Utf8]
+splitOn = undefined
+{-
+splitOn pat src =
+    unsafePerformIO
+        $ Stream.toList $ Stream.splitOnSeq (toArray pat) write (stream src)
+-}
+
+-- | Splits a 'Utf8' into components delimited by separators,
+-- where the predicate returns True for a separator element.  The
+-- resulting components do not contain the separators.  Two adjacent
+-- separators result in an empty component in the output.  eg.
+--
+-- >>> split (=='a') "aabbaca"
+-- ["","","bb","c",""]
+--
+-- >>> split (=='a') ""
+-- [""]
+--
+-- /Time complexity:/ O(n)
+{-# INLINE split #-}
+split :: (Char -> Bool) -> Utf8 -> [Utf8]
+split p t =
+    unsafePerformIO $ Stream.toList $ Stream.splitOn p write (toStream t)
+
+-- | Splits a 'Utf8' into components of length @k@.  The last
+-- element may be shorter than the other chunks, depending on the
+-- length of the input. Examples:
+--
+-- >>> chunksOf 3 "foobarbaz"
+-- ["foo","bar","baz"]
+--
+-- >>> chunksOf 4 "haskell.org"
+-- ["hask","ell.","org"]
+--
+-- /Time complexity:/ O(n)
+{-# INLINE chunksOf #-}
+chunksOf :: Int -> Utf8 -> [Utf8]
+chunksOf k t =
+    unsafePerformIO $ Stream.toList $ Stream.chunksOf k write (toStream t)
+
+-- XXX Add time complexity after implementation
+-- | Find the first instance of @needle@ (which must be
+-- non-'null') in @haystack@.  The first element of the returned tuple
+-- is the prefix of @haystack@ before @needle@ is matched.  The second
+-- is the remainder of @haystack@, starting with the match.
+--
+-- Examples:
+--
+-- >> breakOn "::" "a::b::c"
+-- ("a","::b::c")
+--
+-- >> breakOn "/" "foobar"
+-- ("foobar","")
+--
+-- Laws:
+--
+-- > append prefix match == haystack
+-- >   where (prefix, match) = breakOn needle haystack
+--
+-- If you need to break a string by a substring repeatedly (e.g. you
+-- want to break on every instance of a substring), use 'breakOnAll'
+-- instead, as it has lower startup overhead.
+--
+-- /Unimplemented/
+{-# INLINE breakOn #-}
+breakOn :: Utf8 -> Utf8 -> (Utf8, Utf8)
+breakOn = undefined
+
+-- XXX Change >> to >>> after implementation
+-- | Similar to 'breakOn', but searches from the end of the
+-- string.
+--
+-- The first element of the returned tuple is the prefix of @haystack@
+-- up to and including the last match of @needle@.  The second is the
+-- remainder of @haystack@, following the match.
+--
+-- >> breakOnEnd "::" "a::b::c"
+-- ("a::b::","c")
+--
+-- /Time complexity:/ O(n+m)
+--
+-- /Unimplemented/
+{-# INLINE breakOnEnd #-}
+breakOnEnd :: Utf8 -> Utf8 -> (Utf8, Utf8)
+breakOnEnd = undefined
+
+--------------------------------------------------------------------------------
+-- Searching
+--------------------------------------------------------------------------------
+
+-- | The 'partition' function takes a predicate and a 'Utf8',
+-- and returns the pair of 'Utf8's with elements which do and do not
+-- satisfy the predicate, respectively; i.e.
+--
+-- > partition p t == (filter p t, filter (not . p) t)
+--
+-- /Time complexity:/ O(n)
+{-# INLINE partition #-}
+partition :: (Char -> Bool) -> Utf8 -> (Utf8, Utf8)
+partition p = unsafePerformIO . Stream.fold partitionFold . toStream
+
+    where
+
+    partitionFold = Fold.tee (Fold.filter p write) (Fold.filter (not . p) write)
+
+-- XXX Add time complexity after implementation
+-- XXX Change >> to >>> after implementation
+-- | Find all non-overlapping instances of @needle@ in
+-- @haystack@.  Each element of the returned list consists of a pair:
+--
+-- * The entire string prior to the /k/th match (i.e. the prefix)
+--
+-- * The /k/th match, followed by the remainder of the string
+--
+-- Examples:
+--
+-- >> breakOnAll "::" ""
+-- []
+--
+-- >> breakOnAll "/" "a/b/c/"
+-- [("a","/b/c/"),("a/b","/c/"),("a/b/c","/")]
+--
+-- The @needle@ parameter may not be empty.
+--
+-- /Unimplemented/
+{-# INLINE breakOnAll #-}
+breakOnAll :: Utf8              -- ^ @needle@ to search for
+           -> Utf8              -- ^ @haystack@ in which to search
+           -> [(Utf8, Utf8)]
+breakOnAll = undefined
diff --git a/src/Streamly/Internal/Unicode/Utf8/Transform.hs b/src/Streamly/Internal/Unicode/Utf8/Transform.hs
new file mode 100644
index 0000000000..dd192297e5
--- /dev/null
+++ b/src/Streamly/Internal/Unicode/Utf8/Transform.hs
@@ -0,0 +1,584 @@
+-- |
+-- Module      : Streamly.Internal.Unicode.Utf8.Transform
+-- Copyright   : (c) 2021 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+module Streamly.Internal.Unicode.Utf8.Transform
+    (
+    -- * Transformations
+      map
+    , intercalate
+    , intersperse
+    , transpose
+    , reverse
+    , replace
+
+    -- ** Case conversion
+    -- $case
+    , toCaseFold
+    , toLower
+    , toTitle
+    , toUpper
+
+    -- ** Justification
+    , justifyLeft
+    , justifyRight
+    , center
+
+    -- * Construction
+
+    -- ** Scans
+    , scanl
+    , scanl1
+    , scanl'
+    , scanl1'
+    , scanr
+    , scanr1
+
+    -- * Searching
+    , filter
+
+    -- * Substrings
+
+    -- ** Breaking strings
+    , take
+    , takeEnd
+    , drop
+    , dropEnd
+    , takeWhile
+    , takeWhileEnd
+    , dropWhile
+    , dropWhileEnd
+    , dropAround
+    , strip
+    , stripStart
+    , stripEnd
+    )
+where
+
+#include "inline.hs"
+
+--------------------------------------------------------------------------------
+-- Imports
+--------------------------------------------------------------------------------
+
+import Data.Char (isSpace)
+
+import qualified Data.List as List
+import qualified Prelude
+import qualified Streamly.Internal.Data.Stream.IsStream as Stream
+
+import Streamly.Internal.Unicode.Utf8.Type
+
+import Prelude hiding
+    ( drop
+    , dropWhile
+    , filter
+    , map
+    , reverse
+    , scanl
+    , scanl1
+    , scanr
+    , scanr1
+    , take
+    , takeWhile
+    )
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
+
+--------------------------------------------------------------------------------
+-- Transformations
+--------------------------------------------------------------------------------
+
+-- | 'map' @f@ @t@ is the 'Utf8' obtained by applying @f@ to
+-- each element of @t@.
+--
+-- Example:
+--
+-- >>> let message = Utf8.pack "I am not angry. Not at all."
+-- >>> Utf8.map (\c -> if c == '.' then '!' else c) message
+-- "I am not angry! Not at all!"
+--
+-- Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE map #-}
+map :: (Char -> Char) -> Utf8 -> Utf8
+map f = fromStream . Stream.map f . toStream
+
+-- XXX Change >> to >>> after implementation
+-- | The 'intercalate' function takes a 'Utf8' and a list of
+-- 'Utf8's and concatenates the list after interspersing the first
+-- argument between each element of the list.
+--
+-- Example:
+--
+-- >> Utf8.intercalate "NI!" ["We", "seek", "the", "Holy", "Grail"]
+-- "WeNI!seekNI!theNI!HolyNI!Grail"
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE intercalate #-}
+intercalate :: Utf8 -> [Utf8] -> Utf8
+intercalate = undefined
+
+-- XXX Change >> to >>> after implementation
+-- | The 'intersperse' function takes a character and places it
+-- between the characters of a 'Utf8'.
+--
+-- Example:
+--
+-- >> Utf8.intersperse '.' "SHIELD"
+-- "S.H.I.E.L.D"
+--
+-- Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE intersperse #-}
+intersperse :: Char -> Utf8 -> Utf8
+intersperse = undefined
+
+-- XXX Change >> to >>> after implementation
+-- | Reverse the characters of a string.
+--
+-- Example:
+--
+-- >> Utf8.reverse "desrever"
+-- "reversed"
+--
+--
+-- /Time complexity:/ O(n)
+{-# INLINE reverse #-}
+reverse :: Utf8 -> Utf8
+reverse = fromStream . Stream.reverse . toStream
+
+-- XXX Add time complexity after implementation
+-- XXX Change >> to >>> after implementation
+-- | Replace every non-overlapping occurrence of @needle@ in
+-- @haystack@ with @replacement@.
+--
+-- This function behaves as though it was defined as follows:
+--
+-- @
+-- replace needle replacement haystack =
+--   'intercalate' replacement ('splitOn' needle haystack)
+-- @
+--
+-- As this suggests, each occurrence is replaced exactly once.  So if
+-- @needle@ occurs in @replacement@, that occurrence will /not/ itself
+-- be replaced recursively:
+--
+-- >> replace "oo" "foo" "oo"
+-- "foo"
+--
+-- In cases where several instances of @needle@ overlap, only the
+-- first one will be replaced:
+--
+-- >> replace "ofo" "bar" "ofofo"
+-- "barfo"
+--
+-- /Unimplemented/
+{-# INLINE replace #-}
+replace :: Utf8
+        -- ^ @needle@ to search for.  If this string is empty, an
+        -- error will occur.
+        -> Utf8
+        -- ^ @replacement@ to replace @needle@ with.
+        -> Utf8
+        -- ^ @haystack@ in which to search.
+        -> Utf8
+replace = undefined
+
+--------------------------------------------------------------------------------
+-- Case conversions (folds)
+--------------------------------------------------------------------------------
+
+-- | Convert a string to folded case.
+--
+-- This function is mainly useful for performing caseless (also known
+-- as case insensitive) string comparisons.
+--
+-- A string @x@ is a caseless match for a string @y@ if and only if:
+--
+-- @toCaseFold x == toCaseFold y@
+--
+-- The result string may be longer than the input string, and may
+-- differ from applying 'toLower' to the input string.  For instance,
+-- the Armenian small ligature \"&#xfb13;\" (men now, U+FB13) is case
+-- folded to the sequence \"&#x574;\" (men, U+0574) followed by
+-- \"&#x576;\" (now, U+0576), while the Greek \"&#xb5;\" (micro sign,
+-- U+00B5) is case folded to \"&#x3bc;\" (small letter mu, U+03BC)
+-- instead of itself.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE toCaseFold #-}
+toCaseFold :: Utf8 -> Utf8
+toCaseFold = undefined
+
+-- | Convert a string to lower case, using simple case
+-- conversion.
+--
+-- The result string may be longer than the input string.  For
+-- instance, \"&#x130;\" (Latin capital letter I with dot above,
+-- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069)
+-- followed by \" &#x307;\" (combining dot above, U+0307).
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE toLower #-}
+toLower :: Utf8 -> Utf8
+toLower = undefined
+
+-- | Convert a string to upper case, using simple case
+-- conversion.
+--
+-- The result string may be longer than the input string.  For
+-- instance, the German \"&#xdf;\" (eszett, U+00DF) maps to the
+-- two-letter sequence \"SS\".
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE toUpper #-}
+toUpper :: Utf8 -> Utf8
+toUpper = undefined
+
+-- | Convert a 'Utf8' to title case, using simple case
+-- conversion.
+--
+-- The first letter of the input is converted to title case, as is
+-- every subsequent letter that immediately follows a non-letter.
+-- Every letter that immediately follows another letter is converted
+-- to lower case.
+--
+-- The result string may be longer than the input string. For example,
+-- the Latin small ligature &#xfb02; (U+FB02) is converted to the
+-- sequence Latin capital letter F (U+0046) followed by Latin small
+-- letter l (U+006C).
+--
+-- /Note/: this function does not take language or culture specific
+-- rules into account. For instance, in English, different style
+-- guides disagree on whether the book name \"The Hill of the Red
+-- Fox\" is correctly title cased&#x2014;but this function will
+-- capitalize /every/ word.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE toTitle #-}
+toTitle :: Utf8 -> Utf8
+toTitle = undefined
+
+-- XXX Change >> to >>> after implementation
+-- | Left-justify a string to the given length, using the
+-- specified fill character on the right.
+-- Performs replacement on invalid scalar values.
+--
+-- Examples:
+--
+-- >> justifyLeft 7 'x' "foo"
+-- "fooxxxx"
+--
+-- >> justifyLeft 3 'x' "foobar"
+-- "foobar"
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE justifyLeft #-}
+justifyLeft :: Int -> Char -> Utf8 -> Utf8
+justifyLeft = undefined
+
+-- | Right-justify a string to the given length, using the
+-- specified fill character on the left.  Performs replacement on
+-- invalid scalar values.
+--
+-- Examples:
+--
+-- >> justifyRight 7 'x' "bar"
+-- "xxxxbar"
+--
+-- >> justifyRight 3 'x' "foobar"
+-- "foobar"
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE justifyRight #-}
+justifyRight :: Int -> Char -> Utf8 -> Utf8
+justifyRight = undefined
+
+-- XXX Change >> to >>> after implementation
+-- | Center a string to the given length, using the specified
+-- fill character on either side.  Performs replacement on invalid
+-- scalar values.
+--
+-- Examples:
+--
+-- >> center 8 'x' "HS"
+-- "xxxHSxxx"
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE center #-}
+center :: Int -> Char -> Utf8 -> Utf8
+center = undefined
+
+-- | The 'transpose' function transposes the rows and columns
+-- of its 'Utf8' argument.  Note that this function uses 'pack',
+-- 'unpack', and the list version of transpose, and is thus not very
+-- efficient.
+--
+-- Examples:
+--
+-- >>> transpose ["green","orange"]
+-- ["go","rr","ea","en","ng","e"]
+--
+-- >>> transpose ["blue","red"]
+-- ["br","le","ud","e"]
+--
+-- /Time complexity:/ O(n)
+{-# INLINE transpose #-}
+transpose :: [Utf8] -> [Utf8]
+transpose = Prelude.map pack . List.transpose . Prelude.map unpack
+
+--------------------------------------------------------------------------------
+-- Building 'Utf8's
+--------------------------------------------------------------------------------
+
+-- | 'scanl' is similar to 'foldl', but returns a list of
+-- successive reduced values from the left.
+-- Performs replacement on invalid scalar values.
+--
+-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
+--
+-- Note that
+--
+-- > last (scanl f z xs) == foldl f z xs.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE scanl #-}
+scanl :: (Char -> Char -> Char) -> Char -> Utf8 -> Utf8
+scanl f z t = fromStream (Stream.scanl' f z (toStream t))
+
+-- | 'scanl1' is a variant of 'scanl' that has no starting
+-- value argument. Performs replacement on invalid scalar values.
+--
+-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
+--
+-- /Time complexity:/ O(n)
+{-# INLINE scanl1 #-}
+scanl1 :: (Char -> Char -> Char) -> Utf8 -> Utf8
+scanl1 f t = fromStream (Stream.scanl1' f (toStream t))
+
+-- | 'scanl'' is similar to 'foldl'', but returns a list of
+-- successive reduced values from the left.
+-- Performs replacement on invalid scalar values.
+--
+-- > scanl' f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
+--
+-- Note that
+--
+-- > last (scanl' f z xs) == foldl f z xs.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE scanl' #-}
+scanl' :: (Char -> Char -> Char) -> Char -> Utf8 -> Utf8
+scanl' f z t = fromStream (Stream.scanl' f z (toStream t))
+
+-- | 'scanl1'' is a variant of 'scanl'' that has no starting
+-- value argument. Performs replacement on invalid scalar values.
+--
+-- > scanl1' f [x1, x2, ...] == [x1, x1 `f` x2, ...]
+--
+-- /Time complexity:/ O(n)
+{-# INLINE scanl1' #-}
+scanl1' :: (Char -> Char -> Char) -> Utf8 -> Utf8
+scanl1' f t = fromStream (Stream.scanl1' f (toStream t))
+
+-- | 'scanr' is the right-to-left dual of 'scanl'.  Performs
+-- replacement on invalid scalar values.
+--
+-- > scanr f v == reverse . scanl (flip f) v . reverse
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE scanr #-}
+scanr :: (Char -> Char -> Char) -> Char -> Utf8 -> Utf8
+scanr = undefined
+
+-- | 'scanr1' is a variant of 'scanr' that has no starting
+-- value argument. Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE scanr1 #-}
+scanr1 :: (Char -> Char -> Char) -> Utf8 -> Utf8
+scanr1 = undefined
+
+--------------------------------------------------------------------------------
+-- Searching with a predicate
+--------------------------------------------------------------------------------
+
+-- | 'filter', applied to a predicate and a 'Utf8',
+-- returns a 'Utf8' containing those characters that satisfy the
+-- predicate.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE filter #-}
+filter :: (Char -> Bool) -> Utf8 -> Utf8
+filter p t = fromStream (Stream.filter p (toStream t))
+
+--------------------------------------------------------------------------------
+-- Substrings
+--------------------------------------------------------------------------------
+
+-- | 'take' @n@, applied to a 'Utf8', returns the prefix of the
+-- 'Utf8' of length @n@, or the 'Utf8' itself if @n@ is greater than
+-- the length of the Utf8.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL take #-}
+take :: Int -> Utf8 -> Utf8
+take n t = fromStream (Stream.take n (toStream t))
+
+-- | 'takeEnd' @n@ @t@ returns the suffix remaining after
+-- taking @n@ characters from the end of @t@.
+--
+-- Examples:
+--
+-- >> takeEnd 3 "foobar"
+-- "bar"
+--
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL takeEnd #-}
+takeEnd :: Int -> Utf8 -> Utf8
+takeEnd = undefined
+
+-- | 'drop' @n@, applied to a 'Utf8', returns the suffix of the
+-- 'Utf8' after the first @n@ characters, or the empty 'Utf8' if @n@
+-- is greater than the length of the 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL drop #-}
+drop :: Int -> Utf8 -> Utf8
+drop n t = fromStream (Stream.drop n (toStream t))
+
+-- | 'dropEnd' @n@ @t@ returns the prefix remaining after
+-- dropping @n@ characters from the end of @t@.
+--
+-- Examples:
+--
+-- >> dropEnd 3 "foobar"
+-- "foo"
+--
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL dropEnd #-}
+dropEnd :: Int -> Utf8 -> Utf8
+dropEnd = undefined
+
+-- | 'takeWhile', applied to a predicate @p@ and a 'Utf8',
+-- returns the longest prefix (possibly empty) of elements that
+-- satisfy @p@.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL takeWhile #-}
+takeWhile :: (Char -> Bool) -> Utf8 -> Utf8
+takeWhile p t = fromStream (Stream.takeWhile p (toStream t))
+
+-- | 'takeWhileEnd', applied to a predicate @p@ and a 'Utf8',
+-- returns the longest suffix (possibly empty) of elements that
+-- satisfy @p@.
+-- Examples:
+--
+-- >> takeWhileEnd (=='o') "foo"
+-- "oo"
+--
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL takeWhileEnd #-}
+takeWhileEnd :: (Char -> Bool) -> Utf8 -> Utf8
+takeWhileEnd = undefined
+
+-- | 'dropWhile' @p@ @t@ returns the suffix remaining after
+-- 'takeWhile' @p@ @t@.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL dropWhile #-}
+dropWhile :: (Char -> Bool) -> Utf8 -> Utf8
+dropWhile p t = fromStream (Stream.dropWhile p (toStream t))
+
+-- | 'dropWhileEnd' @p@ @t@ returns the prefix remaining after
+-- dropping characters that satisfy the predicate @p@ from the end of
+-- @t@.
+--
+-- Examples:
+--
+-- >> dropWhileEnd (=='.') "foo..."
+-- "foo"
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL dropWhileEnd #-}
+dropWhileEnd :: (Char -> Bool) -> Utf8 -> Utf8
+dropWhileEnd = undefined
+
+-- | 'dropAround' @p@ @t@ returns the substring remaining after
+-- dropping characters that satisfy the predicate @p@ from both the
+-- beginning and end of @t@.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL dropAround #-}
+dropAround :: (Char -> Bool) -> Utf8 -> Utf8
+dropAround p = dropWhile p . dropWhileEnd p
+
+-- | Remove leading white space from a string.  Equivalent to:
+--
+-- > dropWhile isSpace
+--
+-- /Time complexity:/ O(n)
+{-# INLINE stripStart #-}
+stripStart :: Utf8 -> Utf8
+stripStart = dropWhile isSpace
+
+-- | Remove trailing white space from a string.  Equivalent to:
+--
+-- > dropWhileEnd isSpace
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL stripEnd #-}
+stripEnd :: Utf8 -> Utf8
+stripEnd = dropWhileEnd isSpace
+
+-- | Remove leading and trailing white space from a string.
+-- Equivalent to:
+--
+-- > dropAround isSpace
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL strip #-}
+strip :: Utf8 -> Utf8
+strip = dropAround isSpace
diff --git a/src/Streamly/Internal/Unicode/Utf8/Type.hs b/src/Streamly/Internal/Unicode/Utf8/Type.hs
new file mode 100644
index 0000000000..540b37304e
--- /dev/null
+++ b/src/Streamly/Internal/Unicode/Utf8/Type.hs
@@ -0,0 +1,422 @@
+-- |
+-- Module      : Streamly.Internal.Unicode.Utf8.Type
+-- Copyright   : (c) 2021 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+module Streamly.Internal.Unicode.Utf8.Type
+    (
+    -- * Type
+      Utf8
+    , toArray
+
+    -- * Creation and elimination
+    , empty
+    , singleton
+    , toStream
+    , fromStream
+    , pack
+    , unpack
+
+    -- * Basic interface
+    , cons
+    , snoc
+    , append
+    , uncons
+    , unsnoc
+    , head
+    , last
+    , tail
+    , init
+    , null
+
+    , isSingleton
+    , length
+    , compareLength
+
+    -- * Folds
+    , write
+
+    -- * Unfolds
+    , read
+    )
+where
+
+
+#include "inline.hs"
+
+--------------------------------------------------------------------------------
+-- Imports
+--------------------------------------------------------------------------------
+
+import Control.DeepSeq (NFData)
+import Control.Monad.IO.Class (MonadIO)
+import Data.Bifunctor (second)
+import Data.Bits (shiftR, (.&.))
+import Data.Char (ord)
+import Data.String (IsString(..))
+import Data.Word (Word8)
+import GHC.Base (assert)
+import GHC.IO.Encoding.Failure (isSurrogate)
+import Streamly.Internal.Data.Array.Foreign.Type (Array)
+import Streamly.Internal.Data.Fold (Fold)
+import Streamly.Internal.Data.Stream.IsStream (SerialT)
+import Streamly.Internal.Data.Unfold (Unfold)
+import System.IO.Unsafe (unsafePerformIO)
+
+import qualified Streamly.Internal.Data.Array.Foreign as Array
+import qualified Streamly.Internal.Data.Array.Foreign.Type as Array
+import qualified Streamly.Internal.Data.Array.Foreign.Mut.Type as MArray
+import qualified Streamly.Internal.Data.Fold.Type as Fold
+import qualified Streamly.Internal.Data.Stream.IsStream as Stream
+import qualified Streamly.Internal.Unicode.Stream as Unicode
+
+import Prelude hiding (head, init, last, length, null, read, tail)
+
+-- $setup
+-- >>> :set -XOverloadedStrings
+-- >>> import qualified Streamly.Internal.Unicode.Utf8 as Utf8
+
+--------------------------------------------------------------------------------
+-- Type
+--------------------------------------------------------------------------------
+
+-- | A space efficient, packed, unboxed Unicode container.
+newtype Utf8 =
+    Utf8 (Array Word8)
+    deriving (NFData)
+
+empty :: Utf8
+empty = Utf8 Array.nil
+
+--------------------------------------------------------------------------------
+-- Helpers
+--------------------------------------------------------------------------------
+
+{-# INLINE toStream #-}
+toStream :: Monad m => Utf8 -> SerialT m Char
+toStream = Unicode.decodeUtf8 . Array.toStream . toArray
+
+-- XXX Try removing IO
+-- XXX This would require rewriting Array.Foreign.write-ish functions. We can't
+-- use the underlying Array.Foreign.Mut.write-ish
+{-# INLINE fromStream #-}
+fromStream :: SerialT IO Char -> Utf8
+fromStream =
+    Utf8 . unsafePerformIO . Stream.fold Array.write . Unicode.encodeUtf8
+
+--------------------------------------------------------------------------------
+-- Conversion to/from 'Utf8'
+--------------------------------------------------------------------------------
+
+-- | Convert a 'Utf8' to a 'Array Word8'.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE toArray #-}
+toArray :: Utf8 -> Array Word8
+toArray (Utf8 arr) = arr
+
+-- | Convert a 'String' into a 'Utf8'. Performs
+-- replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL pack #-}
+pack :: String -> Utf8
+pack = fromStream . Stream.fromList
+
+-- | Convert a 'Utf8' into a 'String'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE_NORMAL unpack #-}
+unpack :: Utf8 -> String
+unpack = unsafePerformIO . Stream.toList . toStream
+
+--------------------------------------------------------------------------------
+-- Instances
+--------------------------------------------------------------------------------
+
+instance IsString Utf8 where
+    {-# INLINE fromString #-}
+    fromString = pack
+
+instance Show Utf8 where
+    {-# INLINE showsPrec #-}
+    showsPrec p ps r = showsPrec p (unpack ps) r
+
+--------------------------------------------------------------------------------
+-- Streamly style APIs
+--------------------------------------------------------------------------------
+
+-- XXX From the review:
+--
+-- This should be implemented as an Unfold m Char Word8 composed with the input
+-- of Array.write. For that we would need to implement unfoldMany for folds:
+--
+-- > unfoldMany :: Unfold m a b -> Fold m b c -> Fold m a c
+--
+-- If we assume the argument fold to be a non-terminating then it should be easy
+-- to implement. That is do not handle the done case, just error out in the done
+-- case.
+--
+-- Once we have that then we can use:
+--
+-- > writeGeneric = Fold.unfoldMany readCharUtf8 A.write
+--
+-- For readCharUtf8 see https://github.com/composewell/streamly/pull/1055/files
+{-# INLINE write #-}
+write :: forall m. MonadIO m => Fold m Char Utf8
+write = Fold.Fold step initial (return . Utf8 . Array.unsafeFreeze)
+
+    where
+
+    -- XXX Start of with some specific size?
+    initial = return $ Fold.Partial MArray.nil
+
+    -- XXX snocExp over snoc?
+    step arr c =
+        case ord c of
+            x
+                | x <= 0x7F -> do
+                    arr1 <- arr `MArray.snoc` fromIntegral x
+                    return $ Fold.Partial arr1
+                | x <= 0x7FF -> do
+                    arr1 <- arr `snoc2` c
+                    return $ Fold.Partial arr1
+                | x <= 0xFFFF ->
+                    if isSurrogate c
+                    then Fold.Partial <$> snoc3_ arr 239 191 189
+                    else do
+                        arr1 <- arr `snoc3` c
+                        return $ Fold.Partial arr1
+                | otherwise -> do
+                    arr1 <- arr `snoc4` c
+                    return $ Fold.Partial arr1
+
+    {-# INLINE snoc2 #-}
+    snoc2 :: MArray.Array Word8 -> Char -> m (MArray.Array Word8)
+    snoc2 arr c =
+        assert (n >= 0x80 && n <= 0x07ff)
+            $ do
+                arr1 <- arr `MArray.snoc` x1
+                arr1 `MArray.snoc` x2
+
+        where
+
+        n = ord c
+        x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
+        x2 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+    {-# INLINE snoc3_ #-}
+    snoc3_ ::
+           MArray.Array Word8
+        -> Word8
+        -> Word8
+        -> Word8
+        -> m (MArray.Array Word8)
+    snoc3_ arr x1 x2 x3 = do
+        arr1 <- arr `MArray.snoc` x1
+        arr2 <- arr1 `MArray.snoc` x2
+        arr2 `MArray.snoc` x3
+
+    {-# INLINE snoc3 #-}
+    snoc3 :: MArray.Array Word8 -> Char -> m (MArray.Array Word8)
+    snoc3 arr c = assert (n >= 0x80 && n <= 0x07ff) (snoc3_ arr x1 x2 x3)
+
+        where
+
+        n = ord c
+        x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
+        x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+        x3 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+    {-# INLINE snoc4 #-}
+    snoc4 :: MArray.Array Word8 -> Char -> m (MArray.Array Word8)
+    snoc4 arr c =
+        assert (n >= 0x80 && n <= 0x07ff)
+            $ do
+                arr1 <- arr `MArray.snoc` x1
+                arr2 <- arr1 `MArray.snoc` x2
+                arr3 <- arr2 `MArray.snoc` x3
+                arr3 `MArray.snoc` x4
+
+        where
+
+        n = ord c
+        x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
+        x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
+        x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+        x4 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+-- XXX Similar to the comment on write, this should be implemented in terms of
+-- "foldMany" along with "writeCharUtf8"
+--
+-- foldMany :: Fold m b c -> Unfold m a b -> Unfold m a c
+-- writeCharUtf8 :: Fold m Word8 Char
+--
+-- read = foldMany Unicode.writeCharUtf8 Array.read
+--
+-- | Unfold a 'Utf8' into a stream of 'Char'.
+--
+-- /Unimplemented/
+{-# INLINE read #-}
+read :: Unfold m Utf8 Char
+read = undefined
+
+--------------------------------------------------------------------------------
+-- Basic functions
+--------------------------------------------------------------------------------
+
+-- | Produce a singleton 'Utf8'.
+--
+singleton :: Char -> Utf8
+singleton = fromStream . Stream.fromPure
+
+-- XXX From the review:
+--
+-- StreamD cons would be better here. And we should have a caveat that this
+-- function should be avoided to build a big array using this, so you should not
+-- be using foldr cons empty that would suck with StreamD cons. But an operation
+-- like x cons xs would work much better with StreamD cons compared to regular
+-- cons.
+--
+-- You can also memcpy if that turns out to be faster than stream.
+--
+-- | Adds a character to the front of a 'Utf8'. This function is more
+-- costly than its 'List' counterpart because it requires copying a new array.
+-- Performs replacement on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE cons #-}
+cons :: Char -> Utf8 -> Utf8
+cons c = fromStream . Stream.cons c . toStream
+
+-- | Adds a character to the end of a 'Utf8'. Performs replacement
+-- on invalid scalar values.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE snoc #-}
+snoc :: Utf8 -> Char -> Utf8
+snoc = undefined
+
+-- | Appends one 'Utf8' to the other by copying both of them into a new
+-- 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+{-# NOINLINE append #-}
+append :: Utf8 -> Utf8 -> Utf8
+append (Utf8 a) (Utf8 b) = Utf8 $ unsafePerformIO $ Array.splice a b
+
+-- | Returns the first character of a 'Utf8', or 'Nothing' if empty.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE head #-}
+head :: Utf8 -> Maybe Char
+head = unsafePerformIO . Stream.head . toStream
+
+-- XXX From the review:
+--
+-- We can use a length fold and a single char decoding fold in parallel on the
+-- stream. Then we can use a array slice to get the tail of the array using the
+-- length returned by the length fold.
+--
+-- Alternatively, we could get the head char, find its encoded length and use
+-- that to slice the array.
+--
+-- We can use "writeCharUtf8" - once implemented - here. See the notes on
+-- "writeCharUtf8'" in "Unicode.Stream".
+--
+-- | Returns the first character and rest of a 'Utf8', or 'Nothing' if
+-- empty.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE_NORMAL uncons #-}
+uncons :: Utf8 -> Maybe (Char, Utf8)
+uncons = fmap (second fromStream) . unsafePerformIO . Stream.uncons . toStream
+
+-- | Returns the last character of a 'Utf8', or 'Nothing' if empty.
+--
+-- /Time complexity:/ O(1)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL last #-}
+last :: Utf8 -> Char
+last = undefined
+
+-- | Returns all characters after the head of a 'Utf8', or 'Nothing' if
+-- empty.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE_NORMAL tail #-}
+tail :: Utf8 -> Maybe Utf8
+tail = fmap snd . uncons
+
+-- XXX From the review
+--
+-- If we can write a routine to decode utf8 in reverse then we can just decode
+-- the last char from the end of the array and then slice it.
+--
+-- Otherwise, use last on the stream, get the encoded length of the last char
+-- and use that to slice it.
+-- | Returns all but the last character of a 'Utf8', or 'Nothing' if
+-- empty.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE_NORMAL init #-}
+init :: Utf8 -> Maybe Utf8
+init = fmap fromStream . unsafePerformIO . Stream.init . toStream
+
+-- | Returns all but the last character and the last character of a
+-- 'Utf8', or 'Nothing' if empty.
+--
+-- /Time complexity:/ O(1)
+--
+-- /Unimplemented/
+{-# INLINE unsnoc #-}
+unsnoc :: Utf8 -> Maybe (Utf8, Char)
+unsnoc = undefined
+
+-- | Tests whether a 'Utf8' is empty or not.
+--
+-- /Time complexity:/ O(1)
+{-# INLINE null #-}
+null :: Utf8 -> Bool
+null = Array.null . toArray
+
+-- | Tests whether a 'Utf8' contains exactly one character.
+--
+-- /Time complexity:/ O(1)
+--
+-- /Unimplemented/
+{-# INLINE isSingleton #-}
+isSingleton :: Utf8 -> Bool
+isSingleton = undefined
+
+-- XXX From the review
+--
+-- We could possibly determine the length faster by using a custom routine that
+-- counts the starting chars from the utf8 encoded bytes without decoding the
+-- chars.
+--
+-- | Returns the number of characters in a 'Utf8'.
+--
+-- /Time complexity:/ O(n)
+{-# INLINE length #-}
+length :: Utf8 -> Int
+length = unsafePerformIO . Stream.length . toStream
+
+-- | Compare the count of characters in a 'Utf8' to a number.
+--
+-- This function gives the same answer as comparing against the result of
+-- 'length', but can short circuit if the count of characters is greater than
+-- the number, and hence be more efficient.
+--
+-- /Time complexity:/ O(n)
+--
+-- /Unimplemented/
+{-# INLINE_NORMAL compareLength #-}
+compareLength :: Utf8 -> Int -> Ordering
+compareLength = undefined
diff --git a/streamly.cabal b/streamly.cabal
index 2d6dadeb14..2222c7aca1 100644
--- a/streamly.cabal
+++ b/streamly.cabal
@@ -507,6 +507,11 @@ library
 
                      -- streamly-unicode
                      , Streamly.Internal.Unicode.Stream
+                     , Streamly.Internal.Unicode.Utf8.Type
+                     , Streamly.Internal.Unicode.Utf8.Transform
+                     , Streamly.Internal.Unicode.Utf8.Eliminate
+                     , Streamly.Internal.Unicode.Utf8.Generate
+                     , Streamly.Internal.Unicode.Utf8.Reduce
                      , Streamly.Internal.Unicode.Utf8
                      , Streamly.Internal.Unicode.Char
                      , Streamly.Internal.Unicode.Char.Parser