From e22da4fcbbe7012e41d2bac62a20761fae53a463 Mon Sep 17 00:00:00 2001 From: kwxm Date: Sun, 7 Apr 2019 23:41:21 +0100 Subject: [PATCH 1/4] Improve generator; allow printer to include or ignore Uniques in Vars --- boolean/TinyLang/Boolean/Core.hs | 6 ++--- boolean/TinyLang/Boolean/Parser.hs | 2 +- boolean/TinyLang/Boolean/Printer.hs | 40 +++++++++++++++++++++-------- tiny-lang.cabal | 2 ++ 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/boolean/TinyLang/Boolean/Core.hs b/boolean/TinyLang/Boolean/Core.hs index a2a3850..63d5f8d 100644 --- a/boolean/TinyLang/Boolean/Core.hs +++ b/boolean/TinyLang/Boolean/Core.hs @@ -9,13 +9,13 @@ import TinyLang.Var data UnOp = Not - deriving (Show, Read, Generic) + deriving (Show, Read, Generic, Eq) data BinOp = Or | And | Xor - deriving (Show, Read, Generic) + deriving (Show, Read, Generic, Eq) -- TODO: -- 1. pretty-printing @@ -28,7 +28,7 @@ data Expr | EIf Expr Expr Expr | EAppUnOp UnOp Expr | EAppBinOp BinOp Expr Expr - deriving (Show, Generic) + deriving (Show, Generic, Eq) instance Monad m => Serial m UnOp instance Monad m => Serial m BinOp diff --git a/boolean/TinyLang/Boolean/Parser.hs b/boolean/TinyLang/Boolean/Parser.hs index d61de6b..abe7463 100644 --- a/boolean/TinyLang/Boolean/Parser.hs +++ b/boolean/TinyLang/Boolean/Parser.hs @@ -118,7 +118,7 @@ expr1 = valExpr <|> varExpr <|> parens expr -- expr: full expressions expr :: Parser Expr -expr = ifExpr <|> operExpr +expr = operExpr <|> ifExpr -- operExpr: expressions involving unary and binary operators operExpr :: Parser Expr diff --git a/boolean/TinyLang/Boolean/Printer.hs b/boolean/TinyLang/Boolean/Printer.hs index a8d379e..0abc6d7 100644 --- a/boolean/TinyLang/Boolean/Printer.hs +++ b/boolean/TinyLang/Boolean/Printer.hs @@ -1,11 +1,21 @@ -module TinyLang.Boolean.Printer (toString) -where +module TinyLang.Boolean.Printer + (toStringWithIDs, + toStringNoIDs + ) where import TinyLang.Boolean.Core import TinyLang.Boolean.Environment (lookupVar) import TinyLang.Var --- | TODO: make it configurable whether printed variable names include their Unique serial number +-- | Variable names are equipped with Unique identifiers. The +-- PrintStyle type determines whether printed variable names include +-- these or not ("x_5" versus "x"). If we're going to re-parse the +-- output of toString we probably don't want the IDs. +data PrintStyle = WithIDs | NoIDs + +toStringVar :: PrintStyle -> Var -> String +toStringVar NoIDs (Var _ name) = name +toStringVar WithIDs v = show v -- or explicitly tell it what to do? toStringUnOp :: UnOp -> String toStringUnOp Not = "not " @@ -22,14 +32,22 @@ isSimple (EVar _) = True isSimple _ = False -- Convert to string (with enclosing () if necessary) -toString1 :: Expr -> String -toString1 e = if isSimple e then toString e else "(" ++ toString e ++ ")" +toString1 :: PrintStyle -> Expr -> String +toString1 s e = if isSimple e then toString s e else "(" ++ toString s e ++ ")" -- Main function -toString :: Expr -> String -toString (EVal b) = if b then "T" else "F" -toString (EVar v) = _varName v -toString (EIf e e1 e2) = "if " ++ toString1 e ++ " then " ++ toString1 e1 ++ " else " ++ toString1 e2 -toString (EAppUnOp op e) = toStringUnOp op ++ toString1 e -toString (EAppBinOp op e1 e2) = toString1 e1 ++ toStringBinOp op ++ toString1 e2 +toString :: PrintStyle -> Expr -> String +toString s (EVal b) = if b then "T" else "F" +toString s (EVar v) = toStringVar s v +toString s (EAppUnOp op e) = toStringUnOp op ++ toString1 s e +toString s (EAppBinOp op e1 e2) = toString1 s e1 ++ toStringBinOp op ++ toString1 s e2 +toString s (EIf e e1 e2) = "if " ++ toString1 s e ++ " then " ++ toString1 s e1 ++ " else " ++ toString1 s e2 + + +-- | Convert an Expr to a String, ignoring Unique IDs in variable names +toStringNoIDs :: Expr -> String +toStringNoIDs = toString NoIDs +-- | Convert an Expr to a String, including Unique IDs in variable names +toStringWithIDs :: Expr -> String +toStringWithIDs = toString WithIDs diff --git a/tiny-lang.cabal b/tiny-lang.cabal index 5a8212f..85e9d64 100644 --- a/tiny-lang.cabal +++ b/tiny-lang.cabal @@ -20,11 +20,13 @@ library TinyLang.Boolean.Core TinyLang.Boolean.Evaluator TinyLang.Boolean.Environment + TinyLang.Boolean.Generator TinyLang.Boolean.Parser TinyLang.Boolean.Printer build-depends: base >= 4.7 && < 5, megaparsec, parser-combinators, + QuickCheck, mtl, containers, smallcheck From 1ccf938bc328ea6ed71a64fefff0c157e4f7b0a9 Mon Sep 17 00:00:00 2001 From: kwxm Date: Mon, 8 Apr 2019 00:07:25 +0100 Subject: [PATCH 2/4] Forgot to add the generator source --- boolean/TinyLang/Boolean/Generator.hs | 152 ++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 boolean/TinyLang/Boolean/Generator.hs diff --git a/boolean/TinyLang/Boolean/Generator.hs b/boolean/TinyLang/Boolean/Generator.hs new file mode 100644 index 0000000..8681fe1 --- /dev/null +++ b/boolean/TinyLang/Boolean/Generator.hs @@ -0,0 +1,152 @@ +module TinyLang.Boolean.Generator + ( + makeVars, + defaultVars, + boundedAbritraryExpr, + defaultArbitraryExpr, + prop_checkparse + ) where + +import Control.Monad +import Test.QuickCheck +import TinyLang.Boolean.Core +import TinyLang.Boolean.Parser +import TinyLang.Boolean.Printer +import TinyLang.Var + + +{-| + We can't use String as the type of variable names in generators for + expressions, because then it's probable that the variables occurring + in a randomly generated expression will all be distinct, which is + unrealistic. Also we have to conform to the concrete syntax, and + deal sensibly with Unique IDs in some way. + + To deal with these issues we parameterise the expression generator + over a list of Vars, and the variables appearing in the expression + will be chosen uniformly from this list using QuickCheck's + 'elements'. It's OK to have repeated variables: the more often a + variable appears in the list, the more often it's likely to appear + in a random expression. + + Variable names should be of the form [a-z][a-z0-9]* if they're going + to be printed and fed to the parser. +-} + +type VarName = String + +-- | A convenience method to convert a list of Strings into a list of +-- Vars. The variables are given unique serial numbers 1,2,3,..., +-- which means that multiple occurrences of the same name will yield +-- different Vars: this may or may not be what you want. +makeVars :: [VarName] -> [Var] +makeVars = zipWith (\index name -> Var (Unique index) name) [1..] + +-- | A list of default variables for convenience. +defaultVars :: [Var] +defaultVars = makeVars ["a", "b", "c", "d", "e", "f", "g", "h"] + +-- | Default depth bound for generated expressions. +defaultDepth :: Int +defaultDepth = 6 + +-- | Generator for values +arbitraryValue :: Gen Bool +arbitraryValue = arbitrary + +-- | Generator for variables, choosing from the given list. +arbitraryVar :: [Var] -> Gen Var +arbitraryVar = elements + +-- | Generator for unary operators. We only have one at the moment, +-- so this is kind of trivial: it's easily extensible though. +arbitraryUnOp :: Gen UnOp +arbitraryUnOp = elements [Not] + +-- | Generator for binary operators. +arbitraryBinOp :: Gen BinOp +arbitraryBinOp = elements [And, Xor, Or] + +-- Generator for atoms (expressions with no subexpressions): EVal or EVar. +arbitraryAtom :: [Var] -> Gen Expr +arbitraryAtom vars = oneof [liftM EVal arbitraryValue, liftM EVar (arbitraryVar vars)] + + +{-| Generate an arbirary expression of maximum depth d. If we use + 'elements' to choose uniformly from the five constructors of Expr + then 40% of the time we get an atom so the trees aren't very deep + (indeed, 40% of the time they only have a single node). We use + 'frequency' instead, and the chosen frequencies give us an atom + twice out of every 17 atttempts, which make the trees quite deep + and bushy. + + Depending on the use case, it might be worth adjusting the frequencies. + We might also try to bond the number of nodes, rather than the depth. +-} +-- | NOTE: we should probably use 'sized' here: it'll help QuickCheck to search for counterexamples. +boundedAbritraryExpr :: [Var] -> Int -> Gen Expr +boundedAbritraryExpr vars depth = + if depth == 0 then + arbitraryAtom vars + else + frequency [ (1, EVal <$> arbitraryValue) + , (1, EVar <$> arbitraryVar vars) + , (5, EAppUnOp <$> arbitraryUnOp <*> subexpr) + , (5, EAppBinOp <$> arbitraryBinOp <*> subexpr <*> subexpr) + , (5, EIf <$> subexpr <*> subexpr <*> subexpr) + ] + where subexpr = boundedAbritraryExpr vars (depth-1) + + +-- | A default generator: variables a..h, depth 10 +defaultArbitraryExpr :: Gen Expr +defaultArbitraryExpr = boundedAbritraryExpr defaultVars defaultDepth + +{- A simple shrinker. If we get a failing example then it just tries + all of the subexpressions. This will produce a minimal example if, + for example we do something like printing with Unique IDs but + parsing without them (because our test compares names but not IDs, + so we'll be getting things like "b_2" == "b"). If this isn't good + enough, we could also try things like replacing individual leaves + with atoms in failing cases. +-} +shrinkExpr :: Expr -> [Expr] +shrinkExpr (EAppUnOp op e) = [e] +shrinkExpr (EAppBinOp op e1 e2) = [e1, e2] +shrinkExpr (EIf e e1 e2) = [e,e1,e2] +shrinkExpr (EVal _) = [] -- Can't shrink an atom +shrinkExpr (EVar _) = [] + + +-- Do we really want to define an instance here? I couldn't +-- immediately see any other way to run quickCheck with a specified +-- generator and shrinker, but I probably didn't look closely enough. +instance Arbitrary Expr + where arbitrary = defaultArbitraryExpr + shrink = shrinkExpr + +--------------------------------------------------------------------------- +-- Since we've got a generator, let's use it to test the parser and printer. + +-- We want to check that printing then parsing is the identity, but in +-- general it won't be because the Uniques in the variables will change. +-- Let's get round that by setting all the IDs to 0. +-- I'm sure there's a fancy Haskell way to do this, but it's not hard to +-- do it the old-fashioned way. + +forgetID :: Var -> Var +forgetID v = Var (Unique 0) (_varName v) + +forgetIDs :: Expr -> Expr +forgetIDs (EVal b) = EVal b +forgetIDs (EVar v) = EVar (forgetID v) +forgetIDs (EAppUnOp op e) = EAppUnOp op (forgetIDs e) +forgetIDs (EAppBinOp op e1 e2) = EAppBinOp op (forgetIDs e1) (forgetIDs e2) +forgetIDs (EIf e e1 e2) = EIf (forgetIDs e) (forgetIDs e1) (forgetIDs e2) + +prop_checkparse e = let r = parseExpr (toStringWithIDs e) + in case r of + Left _ -> False + Right f -> forgetIDs f == forgetIDs e + + From 91e445d1603e7d73956e29b50bc392ef16cac4d1 Mon Sep 17 00:00:00 2001 From: kwxm Date: Mon, 8 Apr 2019 00:08:41 +0100 Subject: [PATCH 3/4] Remove intentional failure in test --- boolean/TinyLang/Boolean/Generator.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boolean/TinyLang/Boolean/Generator.hs b/boolean/TinyLang/Boolean/Generator.hs index 8681fe1..27ce49f 100644 --- a/boolean/TinyLang/Boolean/Generator.hs +++ b/boolean/TinyLang/Boolean/Generator.hs @@ -144,7 +144,7 @@ forgetIDs (EAppUnOp op e) = EAppUnOp op (forgetIDs e) forgetIDs (EAppBinOp op e1 e2) = EAppBinOp op (forgetIDs e1) (forgetIDs e2) forgetIDs (EIf e e1 e2) = EIf (forgetIDs e) (forgetIDs e1) (forgetIDs e2) -prop_checkparse e = let r = parseExpr (toStringWithIDs e) +prop_checkparse e = let r = parseExpr (toStringNoIDs e) in case r of Left _ -> False Right f -> forgetIDs f == forgetIDs e From 58ac4d3945b99b48ebe362800fc724ed9fe43c6a Mon Sep 17 00:00:00 2001 From: kwxm Date: Mon, 8 Apr 2019 19:28:32 +0100 Subject: [PATCH 4/4] Improved generator --- boolean/TinyLang/Boolean/Generator.hs | 114 +++++++++++++++++--------- 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/boolean/TinyLang/Boolean/Generator.hs b/boolean/TinyLang/Boolean/Generator.hs index 27ce49f..6c4a521 100644 --- a/boolean/TinyLang/Boolean/Generator.hs +++ b/boolean/TinyLang/Boolean/Generator.hs @@ -14,10 +14,9 @@ import TinyLang.Boolean.Parser import TinyLang.Boolean.Printer import TinyLang.Var - {-| We can't use String as the type of variable names in generators for - expressions, because then it's probable that the variables occurring + expressions because then it's probable that the variables occurring in a randomly generated expression will all be distinct, which is unrealistic. Also we have to conform to the concrete syntax, and deal sensibly with Unique IDs in some way. @@ -27,7 +26,8 @@ import TinyLang.Var will be chosen uniformly from this list using QuickCheck's 'elements'. It's OK to have repeated variables: the more often a variable appears in the list, the more often it's likely to appear - in a random expression. + in a random expression (but note that repeated Vars should be exactly + the same, including the Uniqe ID). Variable names should be of the form [a-z][a-z0-9]* if they're going to be printed and fed to the parser. @@ -46,10 +46,6 @@ makeVars = zipWith (\index name -> Var (Unique index) name) [1..] defaultVars :: [Var] defaultVars = makeVars ["a", "b", "c", "d", "e", "f", "g", "h"] --- | Default depth bound for generated expressions. -defaultDepth :: Int -defaultDepth = 6 - -- | Generator for values arbitraryValue :: Gen Bool arbitraryValue = arbitrary @@ -67,48 +63,72 @@ arbitraryUnOp = elements [Not] arbitraryBinOp :: Gen BinOp arbitraryBinOp = elements [And, Xor, Or] --- Generator for atoms (expressions with no subexpressions): EVal or EVar. +-- | Generator for atoms (expressions with no subexpressions): EVal or EVar. arbitraryAtom :: [Var] -> Gen Expr -arbitraryAtom vars = oneof [liftM EVal arbitraryValue, liftM EVar (arbitraryVar vars)] +arbitraryAtom vars = oneof [EVal <$> arbitraryValue, EVar <$> (arbitraryVar vars)] -{-| Generate an arbirary expression of maximum depth d. If we use - 'elements' to choose uniformly from the five constructors of Expr - then 40% of the time we get an atom so the trees aren't very deep - (indeed, 40% of the time they only have a single node). We use - 'frequency' instead, and the chosen frequencies give us an atom - twice out of every 17 atttempts, which make the trees quite deep - and bushy. +{-| Generate an arbiratry expression of maximum size 'size' containing + the variables supplied in 'vars'. + + If we use 'elements' to choose uniformly from the five constructors of + Expr then 40% of the time we get an atom so the trees aren't very + deep (indeed, 40% of the time they only have a single node). We + use 'frequency' instead, and the chosen frequencies give us an + atom twice out of every 17 atttempts: this makes the trees quite + deep and bushy. Depending on the use case, it might be worth adjusting the frequencies. - We might also try to bond the number of nodes, rather than the depth. + + Note that the 'size' parameter is an upper bound on the number of + nodes, not the depth of the tree. This works better with + QuickCheck's handling of sizes. For testing QuickCheck uses a + default size of 100, and generating trees of depth 100 would be + excessive. + + You probably won't get up to the size bound most of the time since + that would require almost all of the nodes to be non-atoms. Some + quick tests suggest that if you supply a bound of 100,000 then + samples are often of size 10000-35000 but seldom larger, so we're + at least getting the right order of magnitude. + + To change the size while testing you can do things like + + quickCheckWith stdArgs {maxSize=10000} prop_checkparse + + See the QuickCheck documentation for the 'Args' type for + more information. + -} --- | NOTE: we should probably use 'sized' here: it'll help QuickCheck to search for counterexamples. + boundedAbritraryExpr :: [Var] -> Int -> Gen Expr -boundedAbritraryExpr vars depth = - if depth == 0 then +boundedAbritraryExpr vars size = + if size <= 1 then arbitraryAtom vars else frequency [ (1, EVal <$> arbitraryValue) , (1, EVar <$> arbitraryVar vars) - , (5, EAppUnOp <$> arbitraryUnOp <*> subexpr) - , (5, EAppBinOp <$> arbitraryBinOp <*> subexpr <*> subexpr) - , (5, EIf <$> subexpr <*> subexpr <*> subexpr) + , (5, EAppUnOp <$> arbitraryUnOp <*> subexpr1) + , (5, EAppBinOp <$> arbitraryBinOp <*> subexpr2 <*> subexpr2) + , (5, EIf <$> subexpr3 <*> subexpr3 <*> subexpr3) ] - where subexpr = boundedAbritraryExpr vars (depth-1) + where subexpr1 = boundedAbritraryExpr vars (size-1) + subexpr2 = boundedAbritraryExpr vars (size `div` 2) + subexpr3 = boundedAbritraryExpr vars (size `div` 3) - --- | A default generator: variables a..h, depth 10 -defaultArbitraryExpr :: Gen Expr -defaultArbitraryExpr = boundedAbritraryExpr defaultVars defaultDepth +-- | A default generator: defaultVars is defined above. +-- This is used in the Arbitrary instance for Expr below, +-- but you can also call it (or boundedAbritraryExpr) manually. +defaultArbitraryExpr :: Int -> Gen Expr +defaultArbitraryExpr = boundedAbritraryExpr defaultVars {- A simple shrinker. If we get a failing example then it just tries all of the subexpressions. This will produce a minimal example if, - for example we do something like printing with Unique IDs but - parsing without them (because our test compares names but not IDs, - so we'll be getting things like "b_2" == "b"). If this isn't good - enough, we could also try things like replacing individual leaves - with atoms in failing cases. + for example we do something like printing with Unique IDs but parsing + without them (because our test compares names but not IDs, so we'll + be getting things like "b_2" == "b"). If this isn't good enough, we + could also try things like shrinking subexpressions or replacing + individual subtrees with atoms in failing cases. -} shrinkExpr :: Expr -> [Expr] shrinkExpr (EAppUnOp op e) = [e] @@ -118,13 +138,18 @@ shrinkExpr (EVal _) = [] -- Can't shrink an atom shrinkExpr (EVar _) = [] --- Do we really want to define an instance here? I couldn't --- immediately see any other way to run quickCheck with a specified --- generator and shrinker, but I probably didn't look closely enough. +{-| An instance of Arbitrary for Expr. QuickCheck will use this for + testing properties. In the QuickCheck API documentationI couldn't + see any way to run tests using a specified generator and shrinker + without using an instance , but maybe I didn't look closely enough. + See the note about 'quickCheckWith' above for how to change the size + bound while running tests. +-} instance Arbitrary Expr - where arbitrary = defaultArbitraryExpr + where arbitrary = sized defaultArbitraryExpr shrink = shrinkExpr + --------------------------------------------------------------------------- -- Since we've got a generator, let's use it to test the parser and printer. @@ -150,3 +175,18 @@ prop_checkparse e = let r = parseExpr (toStringNoIDs e) Right f -> forgetIDs f == forgetIDs e + +-- A couple of functions for checking the output of generators +nodes :: Expr -> Int +nodes (EVal b) = 1 +nodes (EVar v) = 1 +nodes (EAppUnOp op e) = 1 + nodes e +nodes (EAppBinOp op e1 e2) = 1 + nodes e1 + nodes e2 +nodes (EIf e e1 e2) = 1 + nodes e + nodes e1 + nodes e2 + +depth :: Expr -> Int +depth (EVal b) = 1 +depth (EVar v) = 1 +depth (EAppUnOp op e) = 1 + depth e +depth (EAppBinOp op e1 e2) = 1 + max (depth e1) (depth e2) +depth (EIf e e1 e2) = 1 + max (depth e) (max (depth e1) (depth e2))