From 54dcf53083f65f2213b0e3e686e4671ae398e137 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 4 Jan 2024 11:26:23 +0100 Subject: [PATCH 1/5] Update the Shelley CDDL file to be self contained. This makes it much easier to use for testing. --- example/cddl-files/basic_assign.cddl | 6 ++++++ example/cddl-files/shelley.cddl | 30 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/example/cddl-files/basic_assign.cddl b/example/cddl-files/basic_assign.cddl index c2b9edf..a845572 100644 --- a/example/cddl-files/basic_assign.cddl +++ b/example/cddl-files/basic_assign.cddl @@ -7,3 +7,9 @@ header = , test : coin / null , withComment : null ; This is a comment ] + +header_body = [ + issuer : text +] + +$kes_signature = bytes .size 32 diff --git a/example/cddl-files/shelley.cddl b/example/cddl-files/shelley.cddl index 7d139b4..ae73fb9 100644 --- a/example/cddl-files/shelley.cddl +++ b/example/cddl-files/shelley.cddl @@ -256,3 +256,33 @@ metadata_hash = $hash32 scripthash = $hash28 $nonce /= [ 0 // 1, bytes .size 32 ] + +$hash28 /= bytes .size 28 +$hash32 /= bytes .size 32 + +$vkey /= bytes .size 32 + +$vrf_vkey /= bytes .size 32 +$vrf_cert /= [bytes, bytes .size 80] + +$kes_vkey /= bytes .size 32 +$kes_signature /= bytes .size 448 +signkeyKES = bytes .size 64 + +$signature /= bytes .size 64 + +finite_set = [* a] + +;unit_interval = #6.30([uint, uint]) +unit_interval = #6.30([1, 2]) + ; real unit_interval is: #6.30([uint, uint]) + ; but this produces numbers outside the unit interval + ; and can also produce a zero in the denominator + +rational = #6.30([uint, uint]) + +set = [* a] + +address = bytes + +reward_account = bytes From 5f1745f176a1ef2bfa418522f94fc1d94db1f605 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 4 Jan 2024 11:26:51 +0100 Subject: [PATCH 2/5] Use cabal-fmt to format the cabal file --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index 6b8bb97..ad8b02f 100644 --- a/flake.nix +++ b/flake.nix @@ -32,6 +32,7 @@ ormolu pkgs.haskell.compiler.${ghcver} cabal-install + cabal-fmt ]; enterShell = '' From 7ec6c6d50f0f0674dda84d0d4b065dd09f0774e1 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 4 Jan 2024 11:28:28 +0100 Subject: [PATCH 3/5] Parse control operators correctly. Though at the moment we don't use them, this should make it easier to support in future. --- src/Codec/CBOR/Cuddle/Parser.hs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Codec/CBOR/Cuddle/Parser.hs b/src/Codec/CBOR/Cuddle/Parser.hs index 5d4c2b6..b81d15e 100644 --- a/src/Codec/CBOR/Cuddle/Parser.hs +++ b/src/Codec/CBOR/Cuddle/Parser.hs @@ -3,7 +3,8 @@ module Codec.CBOR.Cuddle.Parser where import Codec.CBOR.Cuddle.CDDL -import Control.Applicative (Applicative (liftA2)) +import Codec.CBOR.Cuddle.CDDL.CtlOp (CtlOp) +import Codec.CBOR.Cuddle.CDDL.CtlOp qualified as COp import Control.Applicative.Combinators.NonEmpty qualified as NE import Data.Functor (void, ($>)) import Data.List.NonEmpty (NonEmpty) @@ -146,12 +147,31 @@ pTyOp :: Parser TyOp pTyOp = choice [ try $ RangeOp <$> pRangeBound, - CtrlOp <$> (char '.' *> pName) + CtrlOp <$> (char '.' *> pCtlOp) ] where pRangeBound :: Parser RangeBound pRangeBound = (string ".." $> Closed) <|> (string ".." $> ClOpen) + pCtlOp :: Parser CtlOp + pCtlOp = + choice + [ try $ string "cbor" $> COp.Cbor, + try $ string "size" $> COp.Size, + try $ string "bits" $> COp.Bits, + try $ string "cborseq" $> COp.Cborseq, + try $ string "within" $> COp.Within, + try $ string "and" $> COp.And, + try $ string "lt" $> COp.Lt, + try $ string "le" $> COp.Le, + try $ string "gt" $> COp.Gt, + try $ string "ge" $> COp.Ge, + try $ string "eq" $> COp.Eq, + try $ string "ne" $> COp.Ne, + try $ string "default" $> COp.Default, + try $ string "regexp" $> COp.Regexp + ] + pOccur :: Parser OccurrenceIndicator pOccur = choice @@ -218,7 +238,7 @@ charInRange lb ub x = lb <= x && x <= ub -- | A variant of 'optional' for composite parsers, which will consume no input -- if it fails. -optcomp :: MonadParsec e s f => f a -> f (Maybe a) +optcomp :: (MonadParsec e s f) => f a -> f (Maybe a) optcomp = optional . try {- @@ -335,7 +355,7 @@ RFC 8610 CDDL June 2019 -} -- | Variant on 'NE.sepEndBy1' which doesn't consume the separator -sepBy1' :: MonadParsec e s m => m a -> m sep -> m (NonEmpty a) +sepBy1' :: (MonadParsec e s m) => m a -> m sep -> m (NonEmpty a) sepBy1' p sep = NE.fromList <$> go where go = liftA2 (:) p (many (try $ sep *> p)) From 3c4306dfde474408349edfb6ef29ccf8597c8011 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 4 Jan 2024 11:31:34 +0100 Subject: [PATCH 4/5] CDDL resolution. This commit does a number of things related to "compiling" the parsed CDDL into something easier to work with for the purposes of CBOR generation and validation: - Introduce the 'CTree' abstration in src/Codec/CBOR/Cuddle/CDDL/CTree.hs. This is a flattened, simplified structure which we use to represent CDDL in an easier-to-manipulate fashion. - Introduce in src/Codec/CBOR/Cuddle/CDDL/Resolve.hs various compilation passes which implement: - Merging assignments - Building a CTree - Resolving the Postlude - Monomorphisation In addition, we make a few unrelated changes: - Move the 'Postlude' module to sit in CDDL/Postlude. - Format the cabal file with cabal-fmt. --- cuddle.cabal | 80 +-- example/Main.hs | 18 + src/Codec/CBOR/Cuddle/CDDL.hs | 111 +++- src/Codec/CBOR/Cuddle/CDDL/CTree.hs | 89 ++++ src/Codec/CBOR/Cuddle/CDDL/CtlOp.hs | 7 +- src/Codec/CBOR/Cuddle/{ => CDDL}/Postlude.hs | 11 +- src/Codec/CBOR/Cuddle/CDDL/Resolve.hs | 524 +++++++++++++++++++ 7 files changed, 782 insertions(+), 58 deletions(-) create mode 100644 src/Codec/CBOR/Cuddle/CDDL/CTree.hs rename src/Codec/CBOR/Cuddle/{ => CDDL}/Postlude.hs (86%) create mode 100644 src/Codec/CBOR/Cuddle/CDDL/Resolve.hs diff --git a/cuddle.cabal b/cuddle.cabal index bd55ad2..476a068 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -1,16 +1,19 @@ -cabal-version: 3.4 -name: cuddle -version: 0.1.0.0 -synopsis: CDDL Generator and test utilities +cabal-version: 3.4 +name: cuddle +version: 0.1.0.0 +synopsis: CDDL Generator and test utilities + -- description: -license: Apache-2.0 -license-file: LICENSE -author: IOG Ledger Team -maintainer: nicholas.clarke@iohk.io +license: Apache-2.0 +license-file: LICENSE +author: IOG Ledger Team +maintainer: nicholas.clarke@iohk.io + -- copyright: -category: Codec -build-type: Simple -extra-doc-files: CHANGELOG.md +category: Codec +build-type: Simple +extra-doc-files: CHANGELOG.md + -- extra-source-files: common warnings @@ -22,48 +25,65 @@ library Codec.CBOR.Cuddle.Builder Codec.CBOR.Cuddle.CDDL Codec.CBOR.Cuddle.CDDL.CtlOp + Codec.CBOR.Cuddle.CDDL.CTree + Codec.CBOR.Cuddle.CDDL.Postlude + Codec.CBOR.Cuddle.CDDL.Resolve Codec.CBOR.Cuddle.Parser Codec.CBOR.Cuddle.Pretty + other-modules: + -- other-extensions: build-depends: - base ^>=4.16.3.0, - bytestring, - cborg, - megaparsec, - parser-combinators, - prettyprinter, - text + , base ^>=4.16.3.0 || ^>=4.18.1.0 + , bytestring + , capability + , cborg + , containers + , generic-optics + , hashable + , megaparsec + , mtl + , mutable-containers + , optics-core + , parser-combinators + , prettyprinter + , random + , text + hs-source-dirs: src default-language: GHC2021 executable example import: warnings default-language: GHC2021 + -- other-modules: -- other-extensions: hs-source-dirs: example main-is: Main.hs build-depends: - base ^>=4.16.3.0, - cuddle, - megaparsec, - prettyprinter, - text + , base ^>=4.16.3.0 || ^>=4.18.1.0 + , cuddle + , megaparsec + , prettyprinter + , random + , text test-suite cuddle-test import: warnings default-language: GHC2021 + -- other-modules: -- other-extensions: type: exitcode-stdio-1.0 hs-source-dirs: test main-is: Main.hs build-depends: - base ^>=4.16.3.0, - cuddle, - hspec, - hspec-megaparsec, - megaparsec, - prettyprinter, - text + , base ^>=4.16.3.0 || ^>=4.18.1.0 + , cuddle + , hspec + , hspec-megaparsec + , megaparsec + , prettyprinter + , text diff --git a/example/Main.hs b/example/Main.hs index 010280f..b76644f 100644 --- a/example/Main.hs +++ b/example/Main.hs @@ -3,6 +3,8 @@ module Main (main) where +import Codec.CBOR.Cuddle.CDDL (Name (..)) +import Codec.CBOR.Cuddle.CDDL.Resolve (asMap, buildMonoCTree, buildRefCTree, buildResolvedCTree) import Codec.CBOR.Cuddle.Parser (pCDDL) import Codec.CBOR.Cuddle.Pretty () import Data.Text qualified as T @@ -22,6 +24,22 @@ main = do Right res -> do print res putDocW 80 $ pretty res + putStrLn "\n" + putStrLn "--------------------------------------------------------------------------------" + putStrLn " As a CTree" + putStrLn "--------------------------------------------------------------------------------" + let refCTree = buildRefCTree (asMap res) + print refCTree + putStrLn "--------------------------------------------------------------------------------" + putStrLn " After name resolution" + putStrLn "--------------------------------------------------------------------------------" + let resolvedCTree = buildResolvedCTree refCTree + print resolvedCTree + putStrLn "--------------------------------------------------------------------------------" + putStrLn " After monomorphisation" + putStrLn "--------------------------------------------------------------------------------" + let monoCTree = buildMonoCTree <$> resolvedCTree + print monoCTree _ -> putStrLn "Expected filename" parseFromFile :: diff --git a/src/Codec/CBOR/Cuddle/CDDL.hs b/src/Codec/CBOR/Cuddle/CDDL.hs index 46bf39d..6ca8d16 100644 --- a/src/Codec/CBOR/Cuddle/CDDL.hs +++ b/src/Codec/CBOR/Cuddle/CDDL.hs @@ -2,12 +2,15 @@ -- https://datatracker.ietf.org/doc/rfc8610/ module Codec.CBOR.Cuddle.CDDL where +import Codec.CBOR.Cuddle.CDDL.CtlOp (CtlOp) import Data.ByteString qualified as B +import Data.Hashable (Hashable) import Data.List.NonEmpty qualified as NE import Data.Text qualified as T +import GHC.Generics (Generic) newtype CDDL = CDDL (NE.NonEmpty Rule) - deriving (Eq, Show) + deriving (Eq, Generic, Show) -- | -- A name can consist of any of the characters from the set {"A" to @@ -33,7 +36,9 @@ newtype CDDL = CDDL (NE.NonEmpty Rule) -- * Rule names (types or groups) do not appear in the actual CBOR -- encoding, but names used as "barewords" in member keys do. newtype Name = Name T.Text - deriving (Eq, Show) + deriving (Eq, Generic, Ord, Show) + +instance Hashable Name -- | -- assignt = "=" / "/=" @@ -49,7 +54,7 @@ newtype Name = Name T.Text -- a rule name that has not yet been defined; this makes the right-hand -- side the first entry in the choice being created.) data Assign = AssignEq | AssignExt - deriving (Eq, Show) + deriving (Eq, Generic, Show) -- | -- Generics @@ -68,10 +73,10 @@ data Assign = AssignEq | AssignExt -- Generic rules can be used for establishing names for both types and -- groups. newtype GenericParam = GenericParam (NE.NonEmpty Name) - deriving (Eq, Show, Semigroup) + deriving (Eq, Generic, Show, Semigroup) newtype GenericArg = GenericArg (NE.NonEmpty Type1) - deriving (Eq, Show, Semigroup) + deriving (Eq, Generic, Show, Semigroup) -- | -- rule = typename [genericparm] S assignt S type @@ -97,7 +102,7 @@ newtype GenericArg = GenericArg (NE.NonEmpty Type1) -- this semantic processing may need to span several levels of rule -- definitions before a determination can be made.) data Rule = Rule Name (Maybe GenericParam) Assign TypeOrGroup - deriving (Eq, Show) + deriving (Eq, Generic, Show) -- | -- A range operator can be used to join two type expressions that stand @@ -106,25 +111,83 @@ data Rule = Rule Name (Maybe GenericParam) Assign TypeOrGroup -- value is always included in the matching set and the second value is -- included for ".." and excluded for "...". data RangeBound = ClOpen | Closed - deriving (Eq, Show) + deriving (Eq, Generic, Show) + +instance Hashable RangeBound -data TyOp = RangeOp RangeBound | CtrlOp Name - deriving (Eq, Show) +data TyOp = RangeOp RangeBound | CtrlOp CtlOp + deriving (Eq, Generic, Show) data TypeOrGroup = TOGType Type0 | TOGGroup GroupEntry - deriving (Eq, Show) + deriving (Eq, Generic, Show) + +{-- | + The group that is used to define a map or an array can often be reused in the + definition of another map or array. Similarly, a type defined as a tag + carries an internal data item that one would like to refer to. In these + cases, it is expedient to simply use the name of the map, array, or tag type + as a handle for the group or type defined inside it. + + The "unwrap" operator (written by preceding a name by a tilde character "~") + can be used to strip the type defined for a name by one layer, exposing the + underlying group (for maps and arrays) or type (for tags). + + For example, an application might want to define a basic header and an + advanced header. Without unwrapping, this might be done as follows: + + basic-header-group = ( + field1: int, + field2: text, + ) + + basic-header = [ basic-header-group ] + + advanced-header = [ + basic-header-group, + field3: bytes, + field4: number, ; as in the tagged type "time" + ] + + Unwrapping simplifies this to: + + basic-header = [ + field1: int, + field2: text, + ] + + advanced-header = [ + ~basic-header, + field3: bytes, + field4: ~time, + ] + + (Note that leaving out the first unwrap operator in the latter example would + lead to nesting the basic-header in its own array inside the advanced-header, + while, with the unwrapped basic-header, the definition of the group inside + basic-header is essentially repeated inside advanced-header, leading to a + single array. This can be used for various applications often solved by + inheritance in programming languages. The effect of unwrapping can also be + described as "threading in" the group or type inside the referenced type, + which suggested the thread-like "~" character.) +-} +unwrap :: TypeOrGroup -> Maybe Group +unwrap (TOGType (Type0 ((Type1 t2 Nothing) NE.:| []))) = case t2 of + T2Map g -> Just g + T2Array g -> Just g + _ -> Nothing +unwrap _ = Nothing -- | -- A type can be given as a choice between one or more types. The -- choice matches a data item if the data item matches any one of the -- types given in the choice. newtype Type0 = Type0 (NE.NonEmpty Type1) - deriving (Eq, Show, Semigroup) + deriving (Eq, Generic, Show, Semigroup) -- | -- Two types can be combined with a range operator (see below) data Type1 = Type1 Type2 (Maybe (TyOp, Type2)) - deriving (Eq, Show) + deriving (Eq, Generic, Show) data Type2 = -- | A type can be just a single value (such as 1 or "icecream" or @@ -160,13 +223,7 @@ data Type2 T2DataItem Int (Maybe Int) | -- | Any data item T2Any - deriving (Eq, Show) - -mkType :: Type2 -> Type0 -mkType t = Type0 $ NE.singleton $ Type1 t Nothing - -mkTypeRange :: Type2 -> Type2 -> RangeBound -> Type0 -mkTypeRange t t' rb = Type0 $ NE.singleton $ Type1 t (Just (RangeOp rb, t')) + deriving (Eq, Generic, Show) -- | -- An optional _occurrence_ indicator can be given in front of a group @@ -187,13 +244,15 @@ data OccurrenceIndicator | OIZeroOrMore | OIOneOrMore | OIBounded (Maybe Int) (Maybe Int) - deriving (Eq, Show) + deriving (Eq, Generic, Show) + +instance Hashable OccurrenceIndicator -- | -- A group matches any sequence of key/value pairs that matches any of -- the choices given (again using PEG semantics). newtype Group = Group (NE.NonEmpty GrpChoice) - deriving (Eq, Show, Semigroup) + deriving (Eq, Generic, Show, Semigroup) type GrpChoice = [GroupEntry] @@ -208,7 +267,7 @@ data GroupEntry = GEType (Maybe OccurrenceIndicator) (Maybe MemberKey) Type0 | GERef (Maybe OccurrenceIndicator) Name (Maybe GenericArg) | GEGroup (Maybe OccurrenceIndicator) Group - deriving (Eq, Show) + deriving (Eq, Generic, Show) -- | -- Key types can be given by a type expression, a bareword (which stands @@ -222,14 +281,16 @@ data MemberKey = MKType Type1 | MKBareword Name | MKValue Value - deriving (Eq, Show) + deriving (Eq, Generic, Show) data Value = -- Should be bigger than just Int VNum Int | VText T.Text | VBytes B.ByteString - deriving (Eq, Show) + deriving (Eq, Generic, Show) + +instance Hashable Value newtype Comment = Comment T.Text - deriving (Eq, Show) + deriving (Eq, Generic, Show) diff --git a/src/Codec/CBOR/Cuddle/CDDL/CTree.hs b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs new file mode 100644 index 0000000..525be6d --- /dev/null +++ b/src/Codec/CBOR/Cuddle/CDDL/CTree.hs @@ -0,0 +1,89 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE NoFieldSelectors #-} + +module Codec.CBOR.Cuddle.CDDL.CTree where + +import Codec.CBOR.Cuddle.CDDL + ( Name, + OccurrenceIndicator, + RangeBound, + Value, + ) +import Codec.CBOR.Cuddle.CDDL.CtlOp +import Codec.CBOR.Cuddle.CDDL.Postlude (PTerm) +import Data.Hashable (Hashable) +import Data.List.NonEmpty qualified as NE +import Data.Map.Strict qualified as Map +import GHC.Generics (Generic) + +-------------------------------------------------------------------------------- + +-- * Resolved CDDL Tree + +-- +-- This is a simplified representation of CDDL. It is technically more general - +-- that is, the structure can represent invalid CDDL - but is in that way easier +-- to manipulate. +-------------------------------------------------------------------------------- + +-- | CDDL Tree, parametrised over a functor +-- +-- We principally use this functor to represent references - thus, every 'f a' +-- may be either an a or a reference to another CTree. +data CTree f + = Literal Value + | Postlude PTerm + | Map [Node f] + | Array [Node f] + | Choice (NE.NonEmpty (Node f)) + | Group [Node f] + | KV {key :: Node f, value :: Node f, cut :: Bool} + | Occur {item :: Node f, occurs :: OccurrenceIndicator} + | Range {from :: Node f, to :: Node f, inclusive :: RangeBound} + | Control {op :: CtlOp, target :: Node f, controller :: Node f} + | Enum (Node f) + | Unwrap (Node f) + deriving (Generic) + +-- | Traverse the CTree, carrying out the given operation at each node +traverseCTree :: (Monad m) => (Node f -> m (Node g)) -> CTree f -> m (CTree g) +traverseCTree _ (Literal a) = pure $ Literal a +traverseCTree _ (Postlude a) = pure $ Postlude a +traverseCTree atNode (Map xs) = Map <$> traverse atNode xs +traverseCTree atNode (Array xs) = Array <$> traverse atNode xs +traverseCTree atNode (Group xs) = Group <$> traverse atNode xs +traverseCTree atNode (Choice xs) = Choice <$> traverse atNode xs +traverseCTree atNode (KV k v cut) = do + k' <- atNode k + v' <- atNode v + pure $ KV k' v' cut +traverseCTree atNode (Occur i occ) = flip Occur occ <$> atNode i +traverseCTree atNode (Range f t inc) = do + f' <- atNode f + t' <- atNode t + pure $ Range f' t' inc +traverseCTree atNode (Control op t c) = do + t' <- atNode t + c' <- atNode c + pure $ Control op t' c' +traverseCTree atNode (Enum ref) = Enum <$> atNode ref +traverseCTree atNode (Unwrap ref) = Unwrap <$> atNode ref + +type Node f = f (CTree f) + +newtype CTreeRoot' poly f + = CTreeRoot + (Map.Map Name (poly (Node f))) + deriving (Generic) + +type CTreeRoot f = CTreeRoot' (ParametrisedWith [Name]) f + +data ParametrisedWith w a + = Unparametrised {underlying :: a} + | Parametrised + { underlying :: a, + params :: w + } + deriving (Eq, Functor, Generic, Foldable, Traversable, Show) + +instance (Hashable w, Hashable a) => Hashable (ParametrisedWith w a) diff --git a/src/Codec/CBOR/Cuddle/CDDL/CtlOp.hs b/src/Codec/CBOR/Cuddle/CDDL/CtlOp.hs index cb1c478..4065498 100644 --- a/src/Codec/CBOR/Cuddle/CDDL/CtlOp.hs +++ b/src/Codec/CBOR/Cuddle/CDDL/CtlOp.hs @@ -1,5 +1,8 @@ module Codec.CBOR.Cuddle.CDDL.CtlOp where +import Data.Hashable (Hashable) +import GHC.Generics (Generic) + -- | A _control_ allows relating a _target_ type with a _controller_ type -- via a _control operator_. @@ -27,4 +30,6 @@ data CtlOp | Eq | Ne | Default - deriving (Eq, Show) + deriving (Eq, Generic, Show) + +instance Hashable CtlOp diff --git a/src/Codec/CBOR/Cuddle/Postlude.hs b/src/Codec/CBOR/Cuddle/CDDL/Postlude.hs similarity index 86% rename from src/Codec/CBOR/Cuddle/Postlude.hs rename to src/Codec/CBOR/Cuddle/CDDL/Postlude.hs index 9db56a0..8d2a8c3 100644 --- a/src/Codec/CBOR/Cuddle/Postlude.hs +++ b/src/Codec/CBOR/Cuddle/CDDL/Postlude.hs @@ -1,4 +1,7 @@ -module Codec.CBOR.Cuddle.Postlude where +module Codec.CBOR.Cuddle.CDDL.Postlude where + +import Data.Hashable (Hashable) +import GHC.Generics (Generic) -- | -- @@ -41,7 +44,11 @@ data PTerm | PTInt | PTHalf | PTFloat - | PTDboule + | PTDouble | PTBytes | PTText | PTAny + | PTNil + deriving (Eq, Generic, Ord, Show) + +instance Hashable PTerm diff --git a/src/Codec/CBOR/Cuddle/CDDL/Resolve.hs b/src/Codec/CBOR/Cuddle/CDDL/Resolve.hs new file mode 100644 index 0000000..842dbda --- /dev/null +++ b/src/Codec/CBOR/Cuddle/CDDL/Resolve.hs @@ -0,0 +1,524 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE NoFieldSelectors #-} + +-- | Module containing tools for 'resolving' CDDL +-- +-- Resolving the CDDL is a process of simplifying the representation to make +-- further operations, such as CBOR generation or validation, easier. We operate +-- with a number of passes: +-- +-- 1. First, we deal with any rule extensions and create a single map from +-- identifiers to (potentially parametrised) entities. +-- 2. Second, we flatten the structure to a 'CTree', which discards a lot of the +-- extrenuous information. +-- 3. Then we resolve identifiers. Specifically, we do three things: +-- - Resolve identifiers that map to the postlude. +-- - Differentiate between generic args and references to top-level rules. +-- - Validate that all references exist. Note that we cannot resolve all +-- references since they may be circular. +-- 4. Finally, we monomorphise, synthesizing instances of rules with their +-- generic arguments bound. +module Codec.CBOR.Cuddle.CDDL.Resolve + ( buildResolvedCTree, + monoCTree, + buildRefCTree, + asMap, + buildMonoCTree, + MonoRef (..), + ) +where + +import Capability.Accessors (Field (..), Lift (..)) +import Capability.Error (HasThrow, MonadError (..), throw) +import Capability.Reader (HasReader, MonadReader (..), ask, local) +import Capability.Sink (HasSink) +import Capability.Source (HasSource) +import Capability.State (HasState, MonadState (..), modify) +import Codec.CBOR.Cuddle.CDDL +import Codec.CBOR.Cuddle.CDDL.CTree + ( CTree, + CTreeRoot, + CTreeRoot' (CTreeRoot), + ParametrisedWith (..), + ) +import Codec.CBOR.Cuddle.CDDL.CTree qualified as CTree +import Codec.CBOR.Cuddle.CDDL.Postlude (PTerm (..)) +import Control.Monad.Except (ExceptT (..), runExceptT) +import Control.Monad.Reader (Reader, ReaderT (..), runReader) +import Control.Monad.State.Strict (StateT (..)) +import Data.Foldable (foldl') +import Data.Functor.Identity (Identity (..)) +import Data.Generics.Product +import Data.Generics.Sum +import Data.Hashable +import Data.List.NonEmpty qualified as NE +import Data.Map.Strict qualified as Map +import Data.Text qualified as T +import GHC.Generics (Generic) +import Optics.Core + +-------------------------------------------------------------------------------- +-- 1. Rule extensions +-------------------------------------------------------------------------------- + +type CDDLMap = Map.Map Name (Parametrised TypeOrGroup) + +type Parametrised a = ParametrisedWith [Name] a + +toParametrised :: a -> Maybe GenericParam -> Parametrised a +toParametrised a Nothing = Unparametrised a +toParametrised a (Just (GenericParam gps)) = Parametrised a (NE.toList gps) + +parameters :: Parametrised a -> [Name] +parameters (Unparametrised _) = mempty +parameters (Parametrised _ ps) = ps + +asMap :: CDDL -> CDDLMap +asMap (CDDL rules) = foldl' assignOrExtend Map.empty rules + where + assignOrExtend :: CDDLMap -> Rule -> CDDLMap + assignOrExtend m (Rule n gps assign tog) = case assign of + -- Equals assignment + AssignEq -> Map.insert n (toParametrised tog gps) m + AssignExt -> Map.alter (extend tog gps) n m + + extend :: + TypeOrGroup -> + Maybe GenericParam -> + Maybe (Parametrised TypeOrGroup) -> + Maybe (Parametrised TypeOrGroup) + extend tog _gps (Just existing) = case (existing.underlying, tog) of + (TOGType _, TOGType (Type0 new)) -> + Just $ + existing + & field @"underlying" + % _Ctor @"TOGType" + % _Ctor @"Type0" + %~ (`NE.append` new) + -- From the CDDL spec, I don't see how one is meant to extend a group. + -- According to the description, it's meant to add a group choice, but the + -- assignment to a group takes a 'GrpEntry', not a Group, and there is no + -- ability to add a choice. For now, we simply ignore attempt at + -- extension. + (TOGGroup _, TOGGroup _new) -> Just existing + (TOGType _, _) -> error "Attempting to extend a type with a group" + (TOGGroup _, _) -> error "Attempting to extend a group with a type" + extend tog gps Nothing = Just $ toParametrised tog gps + +-------------------------------------------------------------------------------- +-- 2. Conversion to CTree +-------------------------------------------------------------------------------- + +-- | Indicates that an item may be referenced rather than defined. +data OrRef a + = -- | The item is inlined directly + It a + | -- | Reference to another node with possible generic arguments supplied + Ref Name [CTree.Node OrRef] + deriving (Show, Functor) + +type RefCTree = CTreeRoot OrRef + +deriving instance Show (CTree OrRef) + +deriving instance Show (CTreeRoot OrRef) + +-- | Build a CTree incorporating references. +-- +-- This translation cannot fail. +buildRefCTree :: CDDLMap -> RefCTree +buildRefCTree rules = CTreeRoot $ fmap toCTreeRule rules + where + toCTreeRule :: + Parametrised TypeOrGroup -> + ParametrisedWith [Name] (CTree.Node OrRef) + toCTreeRule = fmap toCTreeTOG + + toCTreeTOG :: TypeOrGroup -> CTree.Node OrRef + toCTreeTOG (TOGType t0) = toCTreeT0 t0 + toCTreeTOG (TOGGroup ge) = toCTreeGroupEntry ge + + toCTreeT0 :: Type0 -> CTree.Node OrRef + toCTreeT0 (Type0 (t1 NE.:| [])) = toCTreeT1 t1 + toCTreeT0 (Type0 xs) = It . CTree.Choice $ toCTreeT1 <$> xs + + toCTreeT1 :: Type1 -> CTree.Node OrRef + toCTreeT1 (Type1 t2 Nothing) = toCTreeT2 t2 + toCTreeT1 (Type1 t2 (Just (op, t2'))) = case op of + RangeOp bound -> + It $ + CTree.Range + { CTree.from = toCTreeT2 t2, + CTree.to = toCTreeT2 t2', + CTree.inclusive = bound + } + CtrlOp ctlop -> + It $ + CTree.Control + { CTree.op = ctlop, + CTree.target = toCTreeT2 t2, + CTree.controller = toCTreeT2 t2' + } + + toCTreeT2 :: Type2 -> CTree.Node OrRef + toCTreeT2 (T2Value v) = It $ CTree.Literal v + toCTreeT2 (T2Name n garg) = + Ref n (fromGenArgs garg) + toCTreeT2 (T2Group t0) = + -- This behaviour seems questionable, but I don't really see how better to + -- interpret the spec here. + toCTreeT0 t0 + toCTreeT2 (T2Map g) = toCTreeMap g + toCTreeT2 (T2Array g) = toCTreeArray g + toCTreeT2 (T2Unwrapped n margs) = + It . CTree.Unwrap $ + Ref n (fromGenArgs margs) + toCTreeT2 (T2Enum g) = toCTreeEnum g + toCTreeT2 (T2EnumRef n margs) = Ref n $ fromGenArgs margs + toCTreeT2 (T2Tag _mint t0) = + -- Currently not validating tags + toCTreeT0 t0 + toCTreeT2 (T2DataItem _maj _mmin) = + -- We don't validate numerical items yet + It $ CTree.Postlude PTAny + toCTreeT2 T2Any = It $ CTree.Postlude PTAny + + toCTreeGroupEntry :: GroupEntry -> CTree.Node OrRef + toCTreeGroupEntry (GEType (Just occi) mmkey t0) = + It $ + CTree.Occur + { CTree.item = toKVPair mmkey t0, + CTree.occurs = occi + } + toCTreeGroupEntry (GEType Nothing mmkey t0) = toKVPair mmkey t0 + toCTreeGroupEntry (GERef (Just occi) n margs) = + It $ + CTree.Occur + { CTree.item = Ref n (fromGenArgs margs), + CTree.occurs = occi + } + toCTreeGroupEntry (GERef Nothing n margs) = Ref n (fromGenArgs margs) + toCTreeGroupEntry (GEGroup (Just occi) g) = + It $ + CTree.Occur + { CTree.item = groupToGroup g, + CTree.occurs = occi + } + toCTreeGroupEntry (GEGroup Nothing g) = groupToGroup g + + fromGenArgs :: Maybe GenericArg -> [CTree.Node OrRef] + fromGenArgs = maybe [] (\(GenericArg xs) -> NE.toList $ fmap toCTreeT1 xs) + + -- Interpret a group as an enumeration. Note that we float out the + -- choice options + toCTreeEnum :: Group -> CTree.Node OrRef + toCTreeEnum (Group (a NE.:| [])) = + It . CTree.Enum . It . CTree.Group $ fmap toCTreeGroupEntry a + toCTreeEnum (Group xs) = + It . CTree.Choice $ + fmap (It . CTree.Enum . It . CTree.Group . fmap toCTreeGroupEntry) xs + + -- Embed a group in another group, again floating out the choice options + groupToGroup :: Group -> CTree.Node OrRef + groupToGroup (Group (a NE.:| [])) = + It . CTree.Group $ fmap toCTreeGroupEntry a + groupToGroup (Group xs) = + It . CTree.Choice $ + fmap (It . CTree.Group . fmap toCTreeGroupEntry) xs + + toKVPair :: Maybe MemberKey -> Type0 -> CTree.Node OrRef + toKVPair Nothing t0 = toCTreeT0 t0 + toKVPair (Just mkey) t0 = + It $ + CTree.KV + { CTree.key = toCTreeMemberKey mkey, + CTree.value = toCTreeT0 t0, + -- TODO Handle cut semantics + CTree.cut = False + } + + -- Interpret a group as a map. Note that we float out the choice options + toCTreeMap :: Group -> CTree.Node OrRef + toCTreeMap (Group (a NE.:| [])) = It . CTree.Map $ fmap toCTreeGroupEntry a + toCTreeMap (Group xs) = + It + . CTree.Choice + $ fmap (It . CTree.Map . fmap toCTreeGroupEntry) xs + + -- Interpret a group as an array. Note that we float out the choice + -- options + toCTreeArray :: Group -> CTree.Node OrRef + toCTreeArray (Group (a NE.:| [])) = + It . CTree.Array $ fmap toCTreeGroupEntry a + toCTreeArray (Group xs) = + It . CTree.Choice $ + fmap (It . CTree.Array . fmap toCTreeGroupEntry) xs + + toCTreeMemberKey :: MemberKey -> CTree.Node OrRef + toCTreeMemberKey (MKValue v) = It $ CTree.Literal v + toCTreeMemberKey (MKBareword (Name n)) = It $ CTree.Literal (VText n) + toCTreeMemberKey (MKType t1) = toCTreeT1 t1 + +-------------------------------------------------------------------------------- +-- 3. Name resolution +-------------------------------------------------------------------------------- + +data NameResolutionFailure + = UnboundReference Name + | MismatchingArgs Name [Name] + | ArgsToPostlude PTerm [CTree.Node OrRef] + deriving (Show) + +postludeBinding :: Map.Map Name PTerm +postludeBinding = + Map.fromList + [ (Name "bool", PTBool), + (Name "uint", PTUInt), + (Name "nint", PTNInt), + (Name "int", PTInt), + (Name "half", PTHalf), + (Name "float", PTFloat), + (Name "double", PTDouble), + (Name "bytes", PTBytes), + (Name "bstr", PTBytes), + (Name "text", PTText), + (Name "tstr", PTText), + (Name "any", PTAny), + (Name "nil", PTNil), + (Name "null", PTNil) + ] + +data BindingEnv poly f = BindingEnv + { -- | Global name bindings via 'RuleDef' + global :: Map.Map Name (poly (CTree.Node f)), + -- | Local bindings for generic parameters + local :: Map.Map Name (CTree.Node f) + } + deriving (Generic) + +data DistRef a + = DIt a + | -- | Reference to a generic parameter + GenericRef Name + | -- | Reference to a rule definition, possibly with generic arguments + RuleRef Name [CTree.Node DistRef] + deriving (Eq, Generic, Functor, Show) + +instance (Hashable a) => Hashable (DistRef a) + +deriving instance Show (CTree DistRef) + +deriving instance Eq (CTree DistRef) + +instance Hashable (CTree DistRef) + +deriving instance Show (CTreeRoot DistRef) + +deriving instance Eq (CTreeRoot DistRef) + +instance Hashable (CTreeRoot DistRef) + +resolveRef :: + BindingEnv (ParametrisedWith [Name]) OrRef -> + CTree.Node OrRef -> + Either NameResolutionFailure (DistRef (CTree DistRef)) +resolveRef env (It a) = DIt <$> resolveCTree env a +resolveRef env (Ref n args) = case Map.lookup n postludeBinding of + Just pterm -> case args of + [] -> Right . DIt $ CTree.Postlude pterm + xs -> Left $ ArgsToPostlude pterm xs + Nothing -> case Map.lookup n env.global of + Just (parameters -> params) -> + if length params == length args + then + let localBinds = Map.fromList $ zip params args + newEnv = env & field @"local" %~ Map.union localBinds + in RuleRef n <$> traverse (resolveRef newEnv) args + else Left $ MismatchingArgs n params + Nothing -> case Map.lookup n env.local of + Just _ -> Right $ GenericRef n + Nothing -> Left $ UnboundReference n + +resolveCTree :: + BindingEnv (ParametrisedWith [Name]) OrRef -> + CTree OrRef -> + Either NameResolutionFailure (CTree DistRef) +resolveCTree e = CTree.traverseCTree (resolveRef e) + +buildResolvedCTree :: + CTreeRoot OrRef -> + Either NameResolutionFailure (CTreeRoot DistRef) +buildResolvedCTree (CTreeRoot ct) = CTreeRoot <$> traverse go ct + where + initBindingEnv = BindingEnv ct mempty + go pn = + let args = parameters pn + localBinds = Map.fromList $ zip args (flip Ref [] <$> args) + env = initBindingEnv & field @"local" %~ Map.union localBinds + in traverse (resolveRef env) pn + +-------------------------------------------------------------------------------- +-- 4. Monomorphisation +-------------------------------------------------------------------------------- + +data MonoRef a + = MIt a + | MRuleRef Name + deriving (Functor, Show) + +deriving instance Show (CTree MonoRef) + +deriving instance + (Show (poly (CTree.Node MonoRef))) => + Show (CTreeRoot' poly MonoRef) + +type MonoEnv = BindingEnv (ParametrisedWith [Name]) DistRef + +-- | We introduce additional bindings in the state +type MonoState = Map.Map Name (CTree.Node MonoRef) + +-- | Monad to run the monomorphisation process. We need some additional +-- capabilities for this, so 'Either' doesn't fully cut it anymore. +newtype MonoM a = MonoM + { runMonoM :: + ExceptT + NameResolutionFailure + (StateT MonoState (Reader MonoEnv)) + a + } + deriving (Functor, Applicative, Monad) + deriving + (HasThrow "nameResolution" NameResolutionFailure) + via MonadError + ( ExceptT + NameResolutionFailure + (StateT MonoState (Reader MonoEnv)) + ) + deriving + ( HasSource + "local" + (Map.Map Name (CTree.Node DistRef)), + HasReader + "local" + (Map.Map Name (CTree.Node DistRef)) + ) + via Field + "local" + () + ( Lift + ( ExceptT + NameResolutionFailure + (Lift (StateT MonoState (MonadReader (Reader MonoEnv)))) + ) + ) + deriving + ( HasSource + "global" + (Map.Map Name (ParametrisedWith [Name] (CTree.Node DistRef))), + HasReader + "global" + (Map.Map Name (ParametrisedWith [Name] (CTree.Node DistRef))) + ) + via Field + "global" + () + ( Lift + ( ExceptT + NameResolutionFailure + (Lift (StateT MonoState (MonadReader (Reader MonoEnv)))) + ) + ) + deriving + ( HasSource "synth" MonoState, + HasSink "synth" MonoState, + HasState "synth" MonoState + ) + via Lift + ( ExceptT + NameResolutionFailure + (MonadState (StateT MonoState (Reader MonoEnv))) + ) + +throwNR :: NameResolutionFailure -> MonoM a +throwNR = throw @"nameResolution" + +-- | Synthesize a monomorphic rule definition, returning the name +synthMono :: Name -> [CTree.Node DistRef] -> MonoM Name +synthMono n@(Name origName) args = + let fresh = + -- % is not a valid CBOR name, so this should avoid conflict + Name (origName <> "%" <> T.pack (show $ hash args)) + in do + -- Lookup the original name in the global bindings + globalBinds <- ask @"global" + case Map.lookup n globalBinds of + Just (Unparametrised _) -> throwNR $ MismatchingArgs n [] + Just (Parametrised r params) -> + if length params == length args + then + let localBinds = Map.fromList $ zip params args + in local @"local" (Map.union localBinds) $ do + foo <- resolveGenericRef r + modify @"synth" $ Map.insert fresh foo + else throwNR $ MismatchingArgs n params + Nothing -> throwNR $ UnboundReference n + pure fresh + +resolveGenericRef :: + CTree.Node DistRef -> + MonoM (MonoRef (CTree MonoRef)) +resolveGenericRef (DIt a) = MIt <$> resolveGenericCTree a +resolveGenericRef (RuleRef n margs) = + case margs of + [] -> pure $ MRuleRef n + args -> do + fresh <- synthMono n args + pure $ MRuleRef fresh +resolveGenericRef (GenericRef n) = do + localBinds <- ask @"local" + case Map.lookup n localBinds of + Just node -> resolveGenericRef node + Nothing -> throwNR $ UnboundReference n + +resolveGenericCTree :: + CTree DistRef -> + MonoM (CTree MonoRef) +resolveGenericCTree = CTree.traverseCTree resolveGenericRef + +-- | Monomorphise the CTree +-- +-- Concretely, for each reference in the tree to a generic rule, we synthesize a +-- new monomorphic instance of that rule at top-level with the correct +-- parameters applied. +monoCTree :: + CTreeRoot' Identity DistRef -> + MonoM (CTreeRoot' Identity MonoRef) +monoCTree (CTreeRoot ct) = CTreeRoot <$> traverse go ct + where + go = traverse resolveGenericRef + +buildMonoCTree :: + CTreeRoot DistRef -> + Either NameResolutionFailure (CTreeRoot' Identity MonoRef) +buildMonoCTree (CTreeRoot ct) = do + let a1 = runExceptT (monoCTree monoC).runMonoM + a2 = runStateT a1 mempty + (er, newBindings) = runReader a2 initBindingEnv + CTreeRoot r <- er + pure . CTreeRoot $ Map.union r $ fmap Identity newBindings + where + initBindingEnv = BindingEnv ct mempty + monoC = + CTreeRoot $ + Map.mapMaybe + ( \case + Unparametrised f -> Just $ Identity f + Parametrised _ _ -> Nothing + ) + ct From 161347b91ff705c094e7eac16025fb23f71e8667 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 4 Jan 2024 11:40:48 +0100 Subject: [PATCH 5/5] Implement CBOR Generation. This commit adds support for generating CBOR terms corresponding to a given CDDL file. Needs better support for errors, and not all CDDL features are yet supported. --- cuddle.cabal | 1 + example/Main.hs | 15 ++ src/Codec/CBOR/Cuddle/CBOR/Gen.hs | 348 ++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+) create mode 100644 src/Codec/CBOR/Cuddle/CBOR/Gen.hs diff --git a/cuddle.cabal b/cuddle.cabal index 476a068..5858ba0 100644 --- a/cuddle.cabal +++ b/cuddle.cabal @@ -23,6 +23,7 @@ library import: warnings exposed-modules: Codec.CBOR.Cuddle.Builder + Codec.CBOR.Cuddle.CBOR.Gen Codec.CBOR.Cuddle.CDDL Codec.CBOR.Cuddle.CDDL.CtlOp Codec.CBOR.Cuddle.CDDL.CTree diff --git a/example/Main.hs b/example/Main.hs index b76644f..1976085 100644 --- a/example/Main.hs +++ b/example/Main.hs @@ -3,6 +3,7 @@ module Main (main) where +import Codec.CBOR.Cuddle.CBOR.Gen (generateCBORTerm) import Codec.CBOR.Cuddle.CDDL (Name (..)) import Codec.CBOR.Cuddle.CDDL.Resolve (asMap, buildMonoCTree, buildRefCTree, buildResolvedCTree) import Codec.CBOR.Cuddle.Parser (pCDDL) @@ -12,6 +13,7 @@ import Data.Text.IO qualified as T import Prettyprinter (Pretty (pretty)) import Prettyprinter.Util (putDocW) import System.Environment (getArgs) +import System.Random (getStdGen) import Text.Megaparsec (ParseErrorBundle, Parsec, errorBundlePretty, runParser) main :: IO () @@ -40,6 +42,19 @@ main = do putStrLn "--------------------------------------------------------------------------------" let monoCTree = buildMonoCTree <$> resolvedCTree print monoCTree + [fn, name] -> do + putStrLn "--------------------------------------------------------------------------------" + putStrLn " Generating a term" + putStrLn "--------------------------------------------------------------------------------" + parseFromFile pCDDL fn >>= \case + Left err -> putStrLn $ errorBundlePretty err + Right res -> do + stdGen <- getStdGen + case buildMonoCTree =<< buildResolvedCTree (buildRefCTree (asMap res)) of + Left nre -> error $ show nre + Right mt -> + let term = generateCBORTerm mt (Name $ T.pack name) stdGen + in print term _ -> putStrLn "Expected filename" parseFromFile :: diff --git a/src/Codec/CBOR/Cuddle/CBOR/Gen.hs b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs new file mode 100644 index 0000000..9ef8e67 --- /dev/null +++ b/src/Codec/CBOR/Cuddle/CBOR/Gen.hs @@ -0,0 +1,348 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE NoFieldSelectors #-} +{-# OPTIONS_GHC -Wno-unticked-promoted-constructors #-} + +-- | Generate example CBOR given a CDDL specification +module Codec.CBOR.Cuddle.CBOR.Gen where + +import Capability.Reader +import Capability.Sink (HasSink) +import Capability.Source (HasSource, MonadState (..)) +import Capability.State (HasState, state) +import Codec.CBOR.Cuddle.CDDL + ( Name (..), + OccurrenceIndicator (..), + Value (..), + ) +import Codec.CBOR.Cuddle.CDDL.CTree (CTree, CTreeRoot' (..)) +import Codec.CBOR.Cuddle.CDDL.CTree qualified as CTree +import Codec.CBOR.Cuddle.CDDL.Postlude (PTerm (..)) +import Codec.CBOR.Cuddle.CDDL.Resolve (MonoRef (..)) +import Codec.CBOR.Term (Term (..)) +import Control.Monad (replicateM, (<=<)) +import Control.Monad.Reader (Reader, runReader) +import Control.Monad.State.Strict (StateT, runStateT) +import Data.ByteString.Short qualified as BS +import Data.Functor ((<&>)) +import Data.Functor.Identity (Identity (runIdentity)) +import Data.List.NonEmpty qualified as NE +import Data.Map.Strict qualified as Map +import Data.Maybe (fromMaybe) +import GHC.Generics (Generic) +import System.Random.Stateful + ( Random, + RandomGen (genShortByteString, genWord32, genWord64), + RandomGenM, + StatefulGen (..), + StdGen, + UniformRange (uniformRM), + applyRandomGenM, + randomM, + ) + +-------------------------------------------------------------------------------- +-- Generator infrastructure +-------------------------------------------------------------------------------- + +type TypeMap = Map.Map Name (Gen Term) + +-- | Generator context, parametrised over the type of the random seed +data GenEnv g = GenEnv + { cddl :: CTreeRoot' Identity MonoRef, + -- | Access the "fake" seed, necessary to recursively call generators + fakeSeed :: CapGenM g + } + deriving (Generic) + +newtype GenState g = GenState + { -- | Actual seed + randomSeed :: g + } + deriving (Generic) + +newtype M g a = M {runM :: StateT (GenState g) (Reader (GenEnv g)) a} + deriving (Functor, Applicative, Monad) + deriving + (HasSource "randomSeed" g, HasSink "randomSeed" g, HasState "randomSeed" g) + via Field + "randomSeed" + () + (MonadState (StateT (GenState g) (Reader (GenEnv g)))) + deriving + ( HasSource "cddl" (CTreeRoot' Identity MonoRef), + HasReader "cddl" (CTreeRoot' Identity MonoRef) + ) + via Field + "cddl" + () + (Lift (StateT (GenState g) (MonadReader (Reader (GenEnv g))))) + deriving + (HasSource "fakeSeed" (CapGenM g), HasReader "fakeSeed" (CapGenM g)) + via Field + "fakeSeed" + () + (Lift (StateT (GenState g) (MonadReader (Reader (GenEnv g))))) + +-- | Opaque type carrying the type of a pure PRNG inside a capability-style +-- state monad. +data CapGenM g = CapGenM + +instance (RandomGen g) => StatefulGen (CapGenM g) (M g) where + uniformWord64 _ = state @"randomSeed" genWord64 + uniformWord32 _ = state @"randomSeed" genWord32 + + uniformShortByteString n _ = state @"randomSeed" (genShortByteString n) + +instance (RandomGen r) => RandomGenM (CapGenM r) r (M r) where + applyRandomGenM f _ = state @"randomSeed" f + +type Gen = M StdGen + +runGen :: M g a -> GenEnv g -> GenState g -> (a, GenState g) +runGen (M m) env st = runReader (runStateT m st) env + +evalGen :: M g a -> GenEnv g -> GenState g -> a +evalGen m env = fst . runGen m env + +asksM :: forall tag r m a. (HasReader tag r m) => (r -> m a) -> m a +asksM f = f =<< ask @tag + +-------------------------------------------------------------------------------- +-- Wrappers around some Random function in Gen +-------------------------------------------------------------------------------- + +genUniformRM :: forall a g. (UniformRange a, RandomGen g) => (a, a) -> M g a +genUniformRM = asksM @"fakeSeed" . uniformRM + +genRandomM :: forall g a. (Random a, RandomGen g) => M g a +genRandomM = asksM @"fakeSeed" randomM + +-------------------------------------------------------------------------------- +-- Combinators +-------------------------------------------------------------------------------- + +choose :: [a] -> Gen a +choose xs = genUniformRM (0, length xs) >>= \i -> pure $ xs !! i + +oneOf :: [Gen a] -> Gen a +oneOf xs = genUniformRM (0, length xs) >>= \i -> xs !! i + +oneOfGenerated :: Gen [a] -> Gen a +oneOfGenerated genXs = genXs >>= choose + +-------------------------------------------------------------------------------- +-- Postlude +-------------------------------------------------------------------------------- + +-- | Primitive types defined by the CDDL specification, with their generators +genPostlude :: PTerm -> Gen Term +genPostlude pt = case pt of + PTBool -> + genRandomM + <&> TBool + PTUInt -> + genUniformRM (minBound :: Word, maxBound) + <&> TInteger + . fromIntegral + PTNInt -> + genUniformRM + (minBound :: Int, 0) + <&> TInteger + . fromIntegral + PTInt -> + genUniformRM (minBound :: Int, maxBound) + <&> TInteger + . fromIntegral + PTHalf -> + genUniformRM (-65504, 65504) + <&> THalf + PTFloat -> + genRandomM + <&> TFloat + PTDouble -> + genRandomM + <&> TDouble + PTBytes -> + TBytes . BS.fromShort + <$> asksM @"fakeSeed" (uniformShortByteString 30) + PTText -> + pure $ TString "The quick black horse jumped over the lazy dog" + PTAny -> + pure $ TString "Any" + PTNil -> pure TNull + +-------------------------------------------------------------------------------- +-- Kinds of terms +-------------------------------------------------------------------------------- + +data WrappedTerm + = SingleTerm Term + | PairTerm Term Term + | GroupTerm [WrappedTerm] + deriving (Eq, Show) + +-- | Recursively flatten wrapped list. That is, expand any groups out to their +-- individual entries. +flattenWrappedList :: [WrappedTerm] -> [WrappedTerm] +flattenWrappedList [] = [] +flattenWrappedList (GroupTerm xxs : xs) = + flattenWrappedList xxs <> flattenWrappedList xs +flattenWrappedList (y : xs) = y : flattenWrappedList xs + +pattern S :: Term -> WrappedTerm +pattern S t = SingleTerm t + +-- | Convert a list of wrapped terms to a list of terms. If any 'PairTerm's are +-- present, we just take their "value" part. +singleTermList :: [WrappedTerm] -> Maybe [Term] +singleTermList [] = Just [] +singleTermList (S x : xs) = (x :) <$> singleTermList xs +singleTermList (P _ y : xs) = (y :) <$> singleTermList xs +singleTermList _ = Nothing + +pattern P :: Term -> Term -> WrappedTerm +pattern P t1 t2 = PairTerm t1 t2 + +-- | Convert a list of wrapped terms to a list of pairs of terms, or fail if any +-- 'SingleTerm's are present. +pairTermList :: [WrappedTerm] -> Maybe [(Term, Term)] +pairTermList [] = Just [] +pairTermList (P x y : xs) = ((x, y) :) <$> pairTermList xs +pairTermList _ = Nothing + +pattern G :: [WrappedTerm] -> WrappedTerm +pattern G xs = GroupTerm xs + +-------------------------------------------------------------------------------- +-- Generator functions +-------------------------------------------------------------------------------- + +genForCTree :: CTree MonoRef -> Gen WrappedTerm +genForCTree (CTree.Literal v) = S <$> genValue v +genForCTree (CTree.Postlude pt) = S <$> genPostlude pt +genForCTree (CTree.Map nodes) = do + items <- pairTermList . flattenWrappedList <$> traverse genForNode nodes + case items of + Just ts -> pure . S $ TMap ts + Nothing -> error "Single terms in map context" +genForCTree (CTree.Array nodes) = do + items <- singleTermList . flattenWrappedList <$> traverse genForNode nodes + case items of + Just ts -> pure . S $ TList ts + Nothing -> error "Something weird happened which shouldn't be possible" +genForCTree (CTree.Choice (NE.toList -> nodes)) = do + ix <- genUniformRM (0, length nodes - 1) + genForNode $ nodes !! ix +genForCTree (CTree.Group nodes) = G <$> traverse genForNode nodes +genForCTree (CTree.KV key value _cut) = do + kg <- genForNode key + vg <- genForNode value + case (kg, vg) of + (S k, S v) -> pure $ P k v + _ -> + error $ + "Non single-term generated outside of group context: " + <> show key + <> " => " + <> show value +genForCTree (CTree.Occur item occurs) = + applyOccurenceIndicator occurs (genForNode item) +genForCTree (CTree.Range from to _bounds) = do + -- TODO Handle bounds correctly + term1 <- genForNode from + term2 <- genForNode to + case (term1, term2) of + (S (TInteger a), S (TInteger b)) -> genUniformRM (a, b) <&> S . TInteger + (S (THalf a), S (THalf b)) -> genUniformRM (a, b) <&> S . THalf + (S (TFloat a), S (TFloat b)) -> genUniformRM (a, b) <&> S . TFloat + (S (TDouble a), S (TDouble b)) -> genUniformRM (a, b) <&> S . TDouble + _ -> error "Cannot apply range operator to non-numeric types" +genForCTree (CTree.Control _op target _controller) = + -- TODO Handle control operators + genForNode target +genForCTree (CTree.Enum node) = do + tree <- resolveIfRef node + case tree of + CTree.Group nodes -> do + ix <- genUniformRM (0, length nodes) + genForNode $ nodes !! ix + _ -> error "Attempt to form an enum from something other than a group" +genForCTree (CTree.Unwrap node) = genForCTree =<< resolveIfRef node + +genForNode :: CTree.Node MonoRef -> Gen WrappedTerm +genForNode = genForCTree <=< resolveIfRef + +-- | Take something which might be a reference and resolve it to the relevant +-- Tree, following multiple links if necessary. +resolveIfRef :: CTree.Node MonoRef -> Gen (CTree MonoRef) +resolveIfRef (MIt a) = pure a +resolveIfRef (MRuleRef n) = do + (CTreeRoot cddl) <- ask @"cddl" + case Map.lookup n cddl of + Nothing -> error "Unbound reference" + Just val -> resolveIfRef $ runIdentity val + +-- | Generate a CBOR Term corresponding to a top-level name. +-- +-- Since we apply this to a monomorphised CTree, the names must be monomorphic +-- terms in the original CDDL. +-- +-- This will throw an error if the generated item does not correspond to a +-- single CBOR term (e.g. if the name resolves to a group, which cannot be +-- generated outside a context). +genForName :: Name -> Gen Term +genForName n = do + (CTreeRoot cddl) <- ask @"cddl" + case Map.lookup n cddl of + Nothing -> error "Unbound reference" + Just val -> + genForNode (runIdentity val) >>= \case + S x -> pure x + _ -> + error $ + "Tried to generate a top-level term for " + <> show n + <> ", but it does not correspond to a single term." + +-- | Apply an occurence indicator to a group entry +applyOccurenceIndicator :: + OccurrenceIndicator -> + Gen WrappedTerm -> + Gen WrappedTerm +applyOccurenceIndicator OIOptional oldGen = + genRandomM >>= \case + False -> pure $ G mempty + True -> oldGen +applyOccurenceIndicator OIZeroOrMore oldGen = + genUniformRM (0 :: Int, 10) >>= \i -> + G <$> replicateM i oldGen +applyOccurenceIndicator OIOneOrMore oldGen = + genUniformRM (0 :: Int, 10) >>= \i -> + G <$> replicateM i oldGen +applyOccurenceIndicator (OIBounded mlb mub) oldGen = + genUniformRM (fromMaybe 0 mlb :: Int, fromMaybe 10 mub) + >>= \i -> G <$> replicateM i oldGen + +genValue :: Value -> Gen Term +genValue (VNum i) = pure . TInteger $ fromIntegral i +genValue (VText t) = pure $ TString t +genValue (VBytes b) = pure $ TBytes b + +-------------------------------------------------------------------------------- +-- Generator functions +-------------------------------------------------------------------------------- + +generateCBORTerm :: CTreeRoot' Identity MonoRef -> Name -> StdGen -> Term +generateCBORTerm cddl n stdGen = + let genEnv = GenEnv {cddl, fakeSeed = CapGenM} + genState = GenState {randomSeed = stdGen} + in evalGen (genForName n) genEnv genState