From 71166c04f66ecdd7fa915bc769f12d1c75f5ea2c Mon Sep 17 00:00:00 2001 From: Nicolas Di Prima Date: Sun, 21 Jan 2018 11:04:53 +0000 Subject: [PATCH 1/2] add CSV builder on top of Block Builder and String Builder --- Foundation/Format/CSV.hs | 231 ++++++++++++++++++++++++++++ basement/Basement/String/Builder.hs | 7 + foundation.cabal | 3 + tests/Checks.hs | 2 + tests/Test/Foundation/Format.hs | 14 ++ tests/Test/Foundation/Format/CSV.hs | 64 ++++++++ 6 files changed, 321 insertions(+) create mode 100644 Foundation/Format/CSV.hs create mode 100644 tests/Test/Foundation/Format.hs create mode 100644 tests/Test/Foundation/Format/CSV.hs diff --git a/Foundation/Format/CSV.hs b/Foundation/Format/CSV.hs new file mode 100644 index 00000000..76ff3bac --- /dev/null +++ b/Foundation/Format/CSV.hs @@ -0,0 +1,231 @@ +-- | +-- Module : Foundation.Format.CSV +-- License : BSD-style +-- Maintainer : Foundation +-- Stability : experimental +-- Portability : portable +-- +-- Provies the support for Comma Separated Value + +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleInstances #-} + +module Foundation.Format.CSV + (-- * CSV + CSV + + -- ** Builder + -- ** String Bulider + , csvStringBuilder + , rowStringBuilder + , fieldStringBuilder + -- ** Block Builder + , csvBlockBuilder + , rowBlockBuilder + , fieldBlockBuilder + -- ** Conduit + , rowC + -- * Row + , Row + , ToRow(..) + -- * Field + , Field(..) + , Escaping(..) + , ToField(..) + -- ** helpers + , integral + , float + , string + ) where + +import Basement.Imports -- hiding (throw) +import Basement.BoxedArray (Array) +import Basement.NormalForm (NormalForm(..)) +import Basement.From (Into, into) +import Basement.String (String, replace, any, elem) +import qualified Basement.String as String (singleton) +import Basement.Types.Word128 (Word128) +import Basement.Types.Word256 (Word256) +import Basement.Types.OffsetSize (Offset, CountOf) +import Foundation.Collection.Element (Element) +import Foundation.Collection.Collection (Collection, nonEmpty_) +import Foundation.Collection.Sequential (Sequential(intersperse)) +import Foundation.Collection.Indexed (IndexedCollection) +import Foundation.Check.Arbitrary (Arbitrary(..), frequency) +import Foundation.Conduit.Internal + +import qualified Foundation.String.Builder as String +import Basement.Block (Block) +import qualified Basement.Block.Builder as Block + +import GHC.ST (runST) + +-- | CSV field +data Field + = FieldInteger Integer + | FieldDouble Double + | FieldString String Escaping + deriving (Eq, Show, Typeable) +instance NormalForm Field where + toNormalForm (FieldInteger i) = toNormalForm i + toNormalForm (FieldDouble d) = toNormalForm d + toNormalForm (FieldString s e) = toNormalForm s `seq` toNormalForm e +instance Arbitrary Field where + arbitrary = frequency $ nonEmpty_ [ (1, FieldInteger <$> arbitrary) + , (1, FieldDouble <$> arbitrary) + , (3, string <$> arbitrary) + ] + +data Escaping = NoEscape | Escape | DoubleEscape + deriving (Eq, Ord, Enum, Bounded, Show, Typeable) +instance NormalForm Escaping where + toNormalForm !_ = () + +class ToField a where + toField :: a -> Field +instance ToField Field where + toField = id +instance ToField a => ToField (Maybe a) where + toField Nothing = FieldString mempty NoEscape + toField (Just a) = toField a + +instance ToField Int8 where + toField = FieldInteger . into +instance ToField Int16 where + toField = FieldInteger . into +instance ToField Int32 where + toField = FieldInteger . into +instance ToField Int64 where + toField = FieldInteger . into +instance ToField Int where + toField = FieldInteger . into + +instance ToField Word8 where + toField = FieldInteger . into +instance ToField Word16 where + toField = FieldInteger . into +instance ToField Word32 where + toField = FieldInteger . into +instance ToField Word64 where + toField = FieldInteger . into +instance ToField Word where + toField = FieldInteger . into +instance ToField Word128 where + toField = FieldInteger . into +instance ToField Word256 where + toField = FieldInteger . into + +instance ToField Integer where + toField = FieldInteger +instance ToField Natural where + toField = FieldInteger . into + +instance ToField Double where + toField = FieldDouble + +instance ToField Char where + toField = string . String.singleton + +instance ToField (Offset a) where + toField = FieldInteger . into +instance ToField (CountOf a) where + toField = FieldInteger . into + +instance ToField [Char] where + toField = string . fromString +instance ToField String where + toField = string + +-- | helper function to create a `FieldInteger` +-- +integral :: Into Integer a => a -> Field +integral = FieldInteger . into + +float :: Double -> Field +float = FieldDouble + +-- | heler function to create a FieldString. +-- +-- This function will findout automatically if an escaping is needed. +-- if you wish to perform the escaping manually, do not used this function +-- +string :: String -> Field +string s = FieldString s encoding + where + encoding + | any g s = DoubleEscape + | any f s = Escape + | otherwise = NoEscape + f c = c == '\"' + g c = c `elem` ",\r\n" + +-- | CSV Row +-- +newtype Row = Row { unRow :: Array Field } + deriving (Eq, Show, Typeable, Monoid, Collection, NormalForm, Sequential, IndexedCollection) +type instance Element Row = Field +instance IsList Row where + type Item Row = Field + toList = toList . unRow + fromList = Row . fromList + +class ToRow a where + toRow :: a -> Row +instance ToRow Row where + toRow = id +instance (ToField a, ToField b) => ToRow (a,b) where + toRow (a,b) = fromList [toField a, toField b] +instance (ToField a, ToField b, ToField c) => ToRow (a,b,c) where + toRow (a,b,c) = fromList [toField a, toField b, toField c] +instance (ToField a, ToField b, ToField c, ToField d) => ToRow (a,b,c,d) where + toRow (a,b,c,d) = fromList [toField a, toField b, toField c, toField d] +instance (ToField a, ToField b, ToField c, ToField d, ToField e) => ToRow (a,b,c,d,e) where + toRow (a,b,c,d,e) = fromList [toField a, toField b, toField c, toField d, toField e] +instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f) => ToRow (a,b,c,d,e,f) where + toRow (a,b,c,d,e,f) = fromList [toField a, toField b, toField c, toField d, toField e, toField f] + +-- | CSV Type +newtype CSV = CSV { unCSV :: Array Row } + deriving (Eq, Show, Typeable, Monoid, Collection, NormalForm, Sequential, IndexedCollection) + +type instance Element CSV = Row + +instance IsList CSV where + type Item CSV = Row + toList = toList . unCSV + fromList = CSV . fromList + +-- | serialise the CSV document into a UTF8 string +csvStringBuilder :: CSV -> String.Builder +csvStringBuilder = String.unsafeStringBuilder . csvBlockBuilder + +rowStringBuilder :: Row -> String.Builder +rowStringBuilder = String.unsafeStringBuilder . rowBlockBuilder + +fieldStringBuilder :: Field -> String.Builder +fieldStringBuilder = String.unsafeStringBuilder . fieldBlockBuilder + +-- | serialise the CSV document into a UTF8 encoded (Block Word8) +csvBlockBuilder :: CSV -> Block.Builder +csvBlockBuilder = mconcat . intersperse (Block.emitString "\r\n") . fmap rowBlockBuilder . toList . unCSV + +rowBlockBuilder :: Row -> Block.Builder +rowBlockBuilder = mconcat . intersperse (Block.emitUTF8Char ',') . fmap fieldBlockBuilder . toList . unRow + +fieldBlockBuilder :: Field -> Block.Builder +fieldBlockBuilder (FieldInteger i) = Block.emitString $ show i +fieldBlockBuilder (FieldDouble d) = Block.emitString $ show d +fieldBlockBuilder (FieldString s e) = case e of + NoEscape -> Block.emitString s + Escape -> Block.emitUTF8Char '"' <> Block.emitString s <> Block.emitUTF8Char '"' + DoubleEscape -> Block.emitUTF8Char '"' <> Block.emitString (replace "\"" "\"\"" s) <> Block.emitUTF8Char '"' + +rowC :: (ToRow row, Monad m) => Conduit row (Block Word8) m () +rowC = await >>= go + where + go Nothing = pure () + go (Just r) = + let bytes = runST (Block.run $ rowBlockBuilder (toRow r) <> Block.emitString "\r\n") + in yield bytes >> await >>= go diff --git a/basement/Basement/String/Builder.hs b/basement/Basement/String/Builder.hs index e0d2ad3f..562b49f2 100644 --- a/basement/Basement/String/Builder.hs +++ b/basement/Basement/String/Builder.hs @@ -15,6 +15,9 @@ module Basement.String.Builder -- * Emit functions , emit , emitChar + + -- * unsafe + , unsafeStringBuilder ) where @@ -30,6 +33,10 @@ import qualified Basement.UArray.Base as A newtype Builder = Builder Block.Builder deriving (Semigroup, Monoid) +unsafeStringBuilder :: Block.Builder -> Builder +unsafeStringBuilder = Builder +{-# INLINE unsafeStringBuilder #-} + run :: PrimMonad prim => Builder -> prim (String, Maybe ValidationFailure, UArray Word8) run (Builder builder) = do block <- Block.run builder diff --git a/foundation.cabal b/foundation.cabal index c63986cd..b8553a14 100644 --- a/foundation.cabal +++ b/foundation.cabal @@ -74,6 +74,7 @@ library Foundation.Conduit Foundation.Conduit.Textual Foundation.Exception + Foundation.Format.CSV Foundation.String Foundation.String.Read Foundation.String.Builder @@ -228,6 +229,8 @@ test-suite check-foundation Test.Data.List Test.Foundation.Network.IPv4 Test.Foundation.Network.IPv6 + Test.Foundation.Format + Test.Foundation.Format.CSV default-extensions: NoImplicitPrelude RebindableSyntax OverloadedStrings diff --git a/tests/Checks.hs b/tests/Checks.hs index 7a6478ef..68e35093 100644 --- a/tests/Checks.hs +++ b/tests/Checks.hs @@ -28,6 +28,7 @@ import Test.Foundation.Network.IPv4 import Test.Foundation.Network.IPv6 import Test.Foundation.String.Base64 import Test.Checks.Property.Collection +import Test.Foundation.Format import qualified Test.Foundation.Bits as Bits #if MIN_VERSION_base(4,9,0) @@ -204,4 +205,5 @@ main = defaultMain $ Group "foundation" #if MIN_VERSION_base(4,9,0) , testBlockN #endif + , testFormat ] diff --git a/tests/Test/Foundation/Format.hs b/tests/Test/Foundation/Format.hs new file mode 100644 index 00000000..3547ed5c --- /dev/null +++ b/tests/Test/Foundation/Format.hs @@ -0,0 +1,14 @@ +{-# LANGUAGE OverloadedStrings #-} +module Test.Foundation.Format + ( testFormat + ) where + +import Foundation +import Foundation.Check +import Test.Foundation.Format.CSV + + +testFormat :: Test +testFormat = Group "Format" + [ testFormatCSV + ] diff --git a/tests/Test/Foundation/Format/CSV.hs b/tests/Test/Foundation/Format/CSV.hs new file mode 100644 index 00000000..a2496207 --- /dev/null +++ b/tests/Test/Foundation/Format/CSV.hs @@ -0,0 +1,64 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Test.Foundation.Format.CSV + ( testFormatCSV + ) where + +import Foundation +import Foundation.Format.CSV +import Foundation.Check +import Foundation.String.Builder (toString) + +testFormatCSV :: Test +testFormatCSV = Group "CSV" + [ Group "field unit tests" $ testFieldEncoding <$> fieldUnitTests + , Group "row unit tests" $ testRowEncoding <$> rowUnitTests + ] + + where + testFieldEncoding (f,r) = Property (show f) $ + let str = toString (fieldStringBuilder f) + in r === str + testRowEncoding (row,result) = Property (show row) $ + let str = toString (rowStringBuilder row) + in result === str + +fieldUnitTests :: [(Field, String)] +fieldUnitTests = + [ (FieldInteger 42, "42") + , (FieldDouble 1, "1.0") + , (FieldDouble 0.000001, "1.0e-6") + , (FieldString "String" NoEscape, "String") + , (string "String", "String") + , (string "with comma,string", "\"with comma,string\"") + , (FieldString "multiline\nstring" Escape, "\"multiline\nstring\"") + , (FieldString "piece of 12\" by 23\"" DoubleEscape, "\"piece of 12\"\" by 23\"\"\"") + , (string "supported sizes are: 12\", 13\" and 14\"", "\"supported sizes are: 12\"\", 13\"\" and 14\"\"\"") + ] + +rowUnitTests :: [(Row, String)] +rowUnitTests = + [ (fromList [toField (42 :: Int), toField ("some string" :: String)], "42,some string") + , (toRow (42 :: Int, "some string" :: String), "42,some string") + , ( toRow ( 42 :: Int + , "some string" :: String + , "supported sizes are: 12\", 13\" and 14\"" :: String + ) + , "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\"" + ) + , ( toRow ( 42 :: Int + , "some string" :: String + , "supported sizes are: 12\", 13\" and 14\"" :: String + , Just 0.000001 :: Maybe Double + ) + , "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\",1.0e-6" + ) + , ( toRow ( 42 :: Int + , "some string" :: String + , "supported sizes are: 12\", 13\" and 14\"" :: String + , Just 0.000001 :: Maybe Double + , Nothing :: Maybe Char + ) + , "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\",1.0e-6," + ) + ] From 8bc0b7ac8be3df27ef6eb2b27067ffac90aad97c Mon Sep 17 00:00:00 2001 From: Nicolas Di Prima Date: Sun, 21 Jan 2018 19:56:51 +0000 Subject: [PATCH 2/2] fix primitive compat functions --- basement/Basement/Compat/Primitive.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basement/Basement/Compat/Primitive.hs b/basement/Basement/Compat/Primitive.hs index 2b3f6064..1bc286cb 100644 --- a/basement/Basement/Compat/Primitive.hs +++ b/basement/Basement/Compat/Primitive.hs @@ -140,7 +140,7 @@ compatShrinkMutableByteArray# mba i s = compatShrinkMutableByteArray# src i s = -- not check whether i is smaller than the size of the buffer case newAlignedPinnedByteArray# i 8# s of { (# s2, dst #) -> - case copyMutableByteArray# dst 0# src 0# i s2 of { s3 -> (# s3, dst #) }} + case copyMutableByteArray# src 0# dst 0# i s2 of { s3 -> (# s3, dst #) }} #endif {-# INLINE compatShrinkMutableByteArray# #-} @@ -151,7 +151,7 @@ compatResizeMutableByteArray# mba i s = resizeMutableByteArray# mba i s #else compatResizeMutableByteArray# src i s = case newAlignedPinnedByteArray# i 8# s of { (# s2, dst #) -> - case copyMutableByteArray# dst 0# src 0# nbBytes s2 of { s3 -> (# s3, dst #) }} + case copyMutableByteArray# src 0# dst 0# nbBytes s2 of { s3 -> (# s3, dst #) }} where isGrow = bool# (i ># len) nbBytes