Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 231 additions & 0 deletions Foundation/Format/CSV.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
-- |
-- Module : Foundation.Format.CSV
-- License : BSD-style
-- Maintainer : Foundation
-- Stability : experimental
-- Portability : portable
--
-- Provies the support for Comma Separated Value

{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}

module Foundation.Format.CSV
(-- * CSV
CSV

-- ** Builder
-- ** String Bulider
, csvStringBuilder
, rowStringBuilder
, fieldStringBuilder
-- ** Block Builder
, csvBlockBuilder
, rowBlockBuilder
, fieldBlockBuilder
-- ** Conduit
, rowC
-- * Row
, Row
, ToRow(..)
-- * Field
, Field(..)
, Escaping(..)
, ToField(..)
-- ** helpers
, integral
, float
, string
) where

import Basement.Imports -- hiding (throw)
import Basement.BoxedArray (Array)
import Basement.NormalForm (NormalForm(..))
import Basement.From (Into, into)
import Basement.String (String, replace, any, elem)
import qualified Basement.String as String (singleton)
import Basement.Types.Word128 (Word128)
import Basement.Types.Word256 (Word256)
import Basement.Types.OffsetSize (Offset, CountOf)
import Foundation.Collection.Element (Element)
import Foundation.Collection.Collection (Collection, nonEmpty_)
import Foundation.Collection.Sequential (Sequential(intersperse))
import Foundation.Collection.Indexed (IndexedCollection)
import Foundation.Check.Arbitrary (Arbitrary(..), frequency)
import Foundation.Conduit.Internal

import qualified Foundation.String.Builder as String
import Basement.Block (Block)
import qualified Basement.Block.Builder as Block

import GHC.ST (runST)

-- | CSV field
data Field
= FieldInteger Integer
| FieldDouble Double
| FieldString String Escaping
deriving (Eq, Show, Typeable)
instance NormalForm Field where
toNormalForm (FieldInteger i) = toNormalForm i
toNormalForm (FieldDouble d) = toNormalForm d
toNormalForm (FieldString s e) = toNormalForm s `seq` toNormalForm e
instance Arbitrary Field where
arbitrary = frequency $ nonEmpty_ [ (1, FieldInteger <$> arbitrary)
, (1, FieldDouble <$> arbitrary)
, (3, string <$> arbitrary)
]

data Escaping = NoEscape | Escape | DoubleEscape
deriving (Eq, Ord, Enum, Bounded, Show, Typeable)
instance NormalForm Escaping where
toNormalForm !_ = ()

class ToField a where
toField :: a -> Field
instance ToField Field where
toField = id
instance ToField a => ToField (Maybe a) where
toField Nothing = FieldString mempty NoEscape
toField (Just a) = toField a

instance ToField Int8 where
toField = FieldInteger . into
instance ToField Int16 where
toField = FieldInteger . into
instance ToField Int32 where
toField = FieldInteger . into
instance ToField Int64 where
toField = FieldInteger . into
instance ToField Int where
toField = FieldInteger . into

instance ToField Word8 where
toField = FieldInteger . into
instance ToField Word16 where
toField = FieldInteger . into
instance ToField Word32 where
toField = FieldInteger . into
instance ToField Word64 where
toField = FieldInteger . into
instance ToField Word where
toField = FieldInteger . into
instance ToField Word128 where
toField = FieldInteger . into
instance ToField Word256 where
toField = FieldInteger . into

instance ToField Integer where
toField = FieldInteger
instance ToField Natural where
toField = FieldInteger . into

instance ToField Double where
toField = FieldDouble

instance ToField Char where
toField = string . String.singleton

instance ToField (Offset a) where
toField = FieldInteger . into
instance ToField (CountOf a) where
toField = FieldInteger . into

instance ToField [Char] where
toField = string . fromString
instance ToField String where
toField = string

-- | helper function to create a `FieldInteger`
--
integral :: Into Integer a => a -> Field
integral = FieldInteger . into

float :: Double -> Field
float = FieldDouble

-- | heler function to create a FieldString.
--
-- This function will findout automatically if an escaping is needed.
-- if you wish to perform the escaping manually, do not used this function
--
string :: String -> Field
string s = FieldString s encoding
where
encoding
| any g s = DoubleEscape
| any f s = Escape
| otherwise = NoEscape
f c = c == '\"'
g c = c `elem` ",\r\n"

-- | CSV Row
--
newtype Row = Row { unRow :: Array Field }
deriving (Eq, Show, Typeable, Monoid, Collection, NormalForm, Sequential, IndexedCollection)
type instance Element Row = Field
instance IsList Row where
type Item Row = Field
toList = toList . unRow
fromList = Row . fromList

class ToRow a where
toRow :: a -> Row
instance ToRow Row where
toRow = id
instance (ToField a, ToField b) => ToRow (a,b) where
toRow (a,b) = fromList [toField a, toField b]
instance (ToField a, ToField b, ToField c) => ToRow (a,b,c) where
toRow (a,b,c) = fromList [toField a, toField b, toField c]
instance (ToField a, ToField b, ToField c, ToField d) => ToRow (a,b,c,d) where
toRow (a,b,c,d) = fromList [toField a, toField b, toField c, toField d]
instance (ToField a, ToField b, ToField c, ToField d, ToField e) => ToRow (a,b,c,d,e) where
toRow (a,b,c,d,e) = fromList [toField a, toField b, toField c, toField d, toField e]
instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f) => ToRow (a,b,c,d,e,f) where
toRow (a,b,c,d,e,f) = fromList [toField a, toField b, toField c, toField d, toField e, toField f]

-- | CSV Type
newtype CSV = CSV { unCSV :: Array Row }
deriving (Eq, Show, Typeable, Monoid, Collection, NormalForm, Sequential, IndexedCollection)

type instance Element CSV = Row

instance IsList CSV where
type Item CSV = Row
toList = toList . unCSV
fromList = CSV . fromList

-- | serialise the CSV document into a UTF8 string
csvStringBuilder :: CSV -> String.Builder
csvStringBuilder = String.unsafeStringBuilder . csvBlockBuilder

rowStringBuilder :: Row -> String.Builder
rowStringBuilder = String.unsafeStringBuilder . rowBlockBuilder

fieldStringBuilder :: Field -> String.Builder
fieldStringBuilder = String.unsafeStringBuilder . fieldBlockBuilder

-- | serialise the CSV document into a UTF8 encoded (Block Word8)
csvBlockBuilder :: CSV -> Block.Builder
csvBlockBuilder = mconcat . intersperse (Block.emitString "\r\n") . fmap rowBlockBuilder . toList . unCSV

rowBlockBuilder :: Row -> Block.Builder
rowBlockBuilder = mconcat . intersperse (Block.emitUTF8Char ',') . fmap fieldBlockBuilder . toList . unRow

fieldBlockBuilder :: Field -> Block.Builder
fieldBlockBuilder (FieldInteger i) = Block.emitString $ show i
fieldBlockBuilder (FieldDouble d) = Block.emitString $ show d
fieldBlockBuilder (FieldString s e) = case e of
NoEscape -> Block.emitString s
Escape -> Block.emitUTF8Char '"' <> Block.emitString s <> Block.emitUTF8Char '"'
DoubleEscape -> Block.emitUTF8Char '"' <> Block.emitString (replace "\"" "\"\"" s) <> Block.emitUTF8Char '"'

rowC :: (ToRow row, Monad m) => Conduit row (Block Word8) m ()
rowC = await >>= go
where
go Nothing = pure ()
go (Just r) =
let bytes = runST (Block.run $ rowBlockBuilder (toRow r) <> Block.emitString "\r\n")
in yield bytes >> await >>= go
4 changes: 2 additions & 2 deletions basement/Basement/Compat/Primitive.hs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ compatShrinkMutableByteArray# mba i s =
compatShrinkMutableByteArray# src i s =
-- not check whether i is smaller than the size of the buffer
case newAlignedPinnedByteArray# i 8# s of { (# s2, dst #) ->
case copyMutableByteArray# dst 0# src 0# i s2 of { s3 -> (# s3, dst #) }}
case copyMutableByteArray# src 0# dst 0# i s2 of { s3 -> (# s3, dst #) }}
#endif
{-# INLINE compatShrinkMutableByteArray# #-}

Expand All @@ -151,7 +151,7 @@ compatResizeMutableByteArray# mba i s = resizeMutableByteArray# mba i s
#else
compatResizeMutableByteArray# src i s =
case newAlignedPinnedByteArray# i 8# s of { (# s2, dst #) ->
case copyMutableByteArray# dst 0# src 0# nbBytes s2 of { s3 -> (# s3, dst #) }}
case copyMutableByteArray# src 0# dst 0# nbBytes s2 of { s3 -> (# s3, dst #) }}
where
isGrow = bool# (i ># len)
nbBytes
Expand Down
7 changes: 7 additions & 0 deletions basement/Basement/String/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ module Basement.String.Builder
-- * Emit functions
, emit
, emitChar

-- * unsafe
, unsafeStringBuilder
) where


Expand All @@ -30,6 +33,10 @@ import qualified Basement.UArray.Base as A
newtype Builder = Builder Block.Builder
deriving (Semigroup, Monoid)

unsafeStringBuilder :: Block.Builder -> Builder
unsafeStringBuilder = Builder
{-# INLINE unsafeStringBuilder #-}

run :: PrimMonad prim => Builder -> prim (String, Maybe ValidationFailure, UArray Word8)
run (Builder builder) = do
block <- Block.run builder
Expand Down
3 changes: 3 additions & 0 deletions foundation.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ library
Foundation.Conduit
Foundation.Conduit.Textual
Foundation.Exception
Foundation.Format.CSV
Foundation.String
Foundation.String.Read
Foundation.String.Builder
Expand Down Expand Up @@ -228,6 +229,8 @@ test-suite check-foundation
Test.Data.List
Test.Foundation.Network.IPv4
Test.Foundation.Network.IPv6
Test.Foundation.Format
Test.Foundation.Format.CSV
default-extensions: NoImplicitPrelude
RebindableSyntax
OverloadedStrings
Expand Down
2 changes: 2 additions & 0 deletions tests/Checks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import Test.Foundation.Network.IPv4
import Test.Foundation.Network.IPv6
import Test.Foundation.String.Base64
import Test.Checks.Property.Collection
import Test.Foundation.Format
import qualified Test.Foundation.Bits as Bits

#if MIN_VERSION_base(4,9,0)
Expand Down Expand Up @@ -204,4 +205,5 @@ main = defaultMain $ Group "foundation"
#if MIN_VERSION_base(4,9,0)
, testBlockN
#endif
, testFormat
]
14 changes: 14 additions & 0 deletions tests/Test/Foundation/Format.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{-# LANGUAGE OverloadedStrings #-}
module Test.Foundation.Format
( testFormat
) where

import Foundation
import Foundation.Check
import Test.Foundation.Format.CSV


testFormat :: Test
testFormat = Group "Format"
[ testFormatCSV
]
64 changes: 64 additions & 0 deletions tests/Test/Foundation/Format/CSV.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Test.Foundation.Format.CSV
( testFormatCSV
) where

import Foundation
import Foundation.Format.CSV
import Foundation.Check
import Foundation.String.Builder (toString)

testFormatCSV :: Test
testFormatCSV = Group "CSV"
[ Group "field unit tests" $ testFieldEncoding <$> fieldUnitTests
, Group "row unit tests" $ testRowEncoding <$> rowUnitTests
]

where
testFieldEncoding (f,r) = Property (show f) $
let str = toString (fieldStringBuilder f)
in r === str
testRowEncoding (row,result) = Property (show row) $
let str = toString (rowStringBuilder row)
in result === str

fieldUnitTests :: [(Field, String)]
fieldUnitTests =
[ (FieldInteger 42, "42")
, (FieldDouble 1, "1.0")
, (FieldDouble 0.000001, "1.0e-6")
, (FieldString "String" NoEscape, "String")
, (string "String", "String")
, (string "with comma,string", "\"with comma,string\"")
, (FieldString "multiline\nstring" Escape, "\"multiline\nstring\"")
, (FieldString "piece of 12\" by 23\"" DoubleEscape, "\"piece of 12\"\" by 23\"\"\"")
, (string "supported sizes are: 12\", 13\" and 14\"", "\"supported sizes are: 12\"\", 13\"\" and 14\"\"\"")
]

rowUnitTests :: [(Row, String)]
rowUnitTests =
[ (fromList [toField (42 :: Int), toField ("some string" :: String)], "42,some string")
, (toRow (42 :: Int, "some string" :: String), "42,some string")
, ( toRow ( 42 :: Int
, "some string" :: String
, "supported sizes are: 12\", 13\" and 14\"" :: String
)
, "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\""
)
, ( toRow ( 42 :: Int
, "some string" :: String
, "supported sizes are: 12\", 13\" and 14\"" :: String
, Just 0.000001 :: Maybe Double
)
, "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\",1.0e-6"
)
, ( toRow ( 42 :: Int
, "some string" :: String
, "supported sizes are: 12\", 13\" and 14\"" :: String
, Just 0.000001 :: Maybe Double
, Nothing :: Maybe Char
)
, "42,some string,\"supported sizes are: 12\"\", 13\"\" and 14\"\"\",1.0e-6,"
)
]