From 4a20dd37e790770125619995f64dabdf97820cc6 Mon Sep 17 00:00:00 2001 From: Shane O'Brien Date: Sat, 19 Jun 2021 11:43:30 +0100 Subject: [PATCH] Simplify `eval` to run directly inside the `Query` monad @tomjaguarpaw at ZuriHac questioned whether the `Evaluation` monad was really unnecessary. And yes, it turns out that the `Evaluation` monad wasn't actually really adding any value. The real issue was Postgres's unspecified evaluation order (which in practice behaved like the broken `ListT` from transformers). We now maintain a stack of bindings from previous subselects in the `Query` monad, which future queries can reference. So for `evalulation`, to ensure that Postgres doesn't try to run a function once where we expect it to be run multiple times, we modify the expression to contain a bunch of superfluous lateral references to the previous queries. This ensures that it gets run every time. --- src/Rel8.hs | 3 - src/Rel8/Query.hs | 170 +++++++++++++++++++++++++++++++++++-- src/Rel8/Query.hs-boot | 4 +- src/Rel8/Query/Evaluate.hs | 83 +++++++----------- src/Rel8/Query/Opaleye.hs | 38 +++++++-- tests/Main.hs | 51 +++++++++++ 6 files changed, 283 insertions(+), 66 deletions(-) diff --git a/src/Rel8.hs b/src/Rel8.hs index 9abe01d3..8792f684 100644 --- a/src/Rel8.hs +++ b/src/Rel8.hs @@ -267,9 +267,6 @@ module Rel8 -- ** Sequences , nextval - - , Evaluate - , eval , evaluate -- * Implementation details diff --git a/src/Rel8/Query.hs b/src/Rel8/Query.hs index e6c90e5e..7b8a6107 100644 --- a/src/Rel8/Query.hs +++ b/src/Rel8/Query.hs @@ -1,5 +1,3 @@ -{-# language DerivingVia #-} -{-# language GeneralizedNewtypeDeriving #-} {-# language StandaloneKindSignatures #-} module Rel8.Query @@ -8,14 +6,22 @@ module Rel8.Query where -- base +import Control.Applicative ( liftA2 ) +import Control.Monad ( liftM2 ) import Data.Kind ( Type ) +import Data.Monoid ( Any( Any ) ) import Prelude -- opaleye -import qualified Opaleye.Select as Opaleye +import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye +import qualified Opaleye.Internal.PackMap as Opaleye +import qualified Opaleye.Internal.PrimQuery as Opaleye +import qualified Opaleye.Internal.QueryArr as Opaleye +import qualified Opaleye.Internal.Tag as Opaleye -- rel8 import Rel8.Query.Set ( unionAll ) +import Rel8.Query.Opaleye ( fromOpaleye ) import Rel8.Query.Values ( values ) import Rel8.Table.Alternative ( AltTable, (<|>:) @@ -23,22 +29,172 @@ import Rel8.Table.Alternative ) -- semigroupoids -import Data.Functor.Apply ( Apply, WrappedApplicative(..) ) +import Data.Functor.Apply ( Apply, (<.>) ) import Data.Functor.Bind ( Bind, (>>-) ) -- | The @Query@ monad allows you to compose a @SELECT@ query. This monad has -- semantics similar to the list (@[]@) monad. type Query :: Type -> Type -newtype Query a = Query (Opaleye.Select a) - deriving newtype (Functor, Applicative, Monad) - deriving Apply via (WrappedApplicative Opaleye.Select) +newtype Query a = + Query ( + -- This is based on Opaleye's Select monad, but with two addtions. We + -- maintain a stack of PrimExprs from parent previous subselects. In + -- practice, these are always the results of dummy calls to random(). + -- + -- We also return a Bool that indicates to the parent subselect whether + -- or not that stack of PrimExprs were used at any point. If they weren't, + -- then the call to random() is never added to the query. + -- + -- This is all needed to implement evaluate. Consider the following code: + -- + -- do + -- x <- values [lit 'a', lit 'b', lit 'c'] + -- y <- evaluate $ nextval "user_id_seq" + -- pure (x, y) + -- + -- If we just used Opaleye's Select monad directly, the SQL would come out + -- like this: + -- + -- SELECT + -- a, b + -- FROM + -- (VALUES ('a'), ('b'), ('c')) Q1(a), + -- LATERAL (SELECT nextval('user_id_seq')) Q2(b); + -- + -- From the Haskell code, you would intuitively expect to get back the + -- results of three different calls to nextval(), but from Postgres' point + -- of view, because the Q2 subquery doesn't reference anything from the Q1 + -- query, it thinks it only needs to call nextval() once. This is actually + -- exactly the same problem you get with the deprecated ListT IO monad from + -- the transformers package — *> behaves differently to >>=, so + -- using ApplicativeDo can change the results of a program. ApplicativeDo + -- is exactly the optimisation Postgres does on a "LATERAL" query that + -- doesn't make any references to previous subselects. + -- + -- Rel8's solution is generate the following SQL instead: + -- + -- SELECT + -- a, b + -- FROM + -- (SELECT + -- random() AS dummy, + -- * + -- FROM + -- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1, + -- LATERAL (SELECT + -- CASE + -- WHEN dummy IS NOT NULL + -- THEN nextval('user_id_seq') + -- END) Q2(b); + -- + -- We use random() here as the dummy value (and not some constant) because + -- Postgres will again optimize if it sees that a value is constant + -- (and thus only call nextval() once), but because random() is marked as + -- VOLATILE, this inhibits Postgres from doing that optimisation. + -- + -- Why not just reference the a column from the previous query directly + -- instead of adding a dummy value? Basically, even if we extract out all + -- the bindings introduced in a PrimQuery, we can't always be sure which + -- ones refer to constant values, so if we end up laterally referencing a + -- constant value, then all of this would be for nothing. + -- + -- Why not just add the call to the previous subselect directly, like so: + -- + -- SELECT + -- a, b + -- FROM + -- (SELECT + -- nextval('user_id_seq') AS eval, + -- * + -- FROM + -- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1, + -- LATERAL (SELECT eval) Q2(b); + -- + -- That would work in this case. But consider the following Rel8 code: + -- + -- do + -- x <- values [lit 'a', lit 'b', lit 'c'] + -- y <- values [lit 'd', lit 'e', lit 'f'] + -- z <- evaluate $ nextval "user_id_seq" + -- pure (x, y, z) + -- + -- How many calls to nextval should there be? Our Haskell intuition says + -- nine. But that's not what you would get if you used the above + -- technique. The problem is, which VALUES query should the nextval be + -- added to? You can choose one or the other to get three calls to + -- nextval, but you still need to make a superfluous LATERAL references to + -- the other if you want nine calls. So for the above Rel8 code we generate + -- the following SQL: + -- + -- SELECT + -- a, b, c + -- FROM + -- (SELECT + -- random() AS dummy, + -- * + -- FROM + -- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1, + -- (SELECT + -- random() AS dummy, + -- * + -- FROM + -- (VALUES ('d'), ('e'), ('f')) Q2(b)) Q2, + -- LATERAL (SELECT + -- CASE + -- WHEN Q1.dummy IS NOT NULL AND Q2.dummy IS NOT NULL + -- THEN nextval('user_id_seq') + -- END) Q3(c); + -- + -- This gives nine calls to nextval() as we would expect. + [Opaleye.PrimExpr] -> Opaleye.Select (Any, a) + ) + + +instance Functor Query where + fmap f (Query a) = Query (fmap (fmap (fmap f)) a) + + +instance Apply Query where + (<.>) = (<*>) + + +instance Applicative Query where + pure = fromOpaleye . pure + liftA2 = liftM2 instance Bind Query where (>>-) = (>>=) +instance Monad Query where + Query q >>= f = Query $ \dummies -> Opaleye.QueryArr $ \(_, query, tag) -> + let + Opaleye.QueryArr qa = q dummies + ((m, a), query', tag') = qa ((), query, tag) + Query q' = f a + (dummies', lquery, tag'') = + ( dummy : dummies + , Opaleye.Rebind True bindings query' + , Opaleye.next tag' + ) + where + (dummy, bindings) = Opaleye.run $ name random + where + random = Opaleye.FunExpr "random" [] + name = Opaleye.extractAttr "dummy" tag' + Opaleye.QueryArr qa' = Opaleye.lateral $ \_ -> q' dummies' + ((m'@(Any needsDummies), b), rquery, tag''') = qa' ((), Opaleye.Unit, tag'') + lquery' + | needsDummies = lquery + | otherwise = query' + query'''' = Opaleye.times lquery' rquery + m'' = m <> m' + in + ((m'', b), query'''', tag''') + + -- | '<|>:' = 'unionAll'. instance AltTable Query where (<|>:) = unionAll diff --git a/src/Rel8/Query.hs-boot b/src/Rel8/Query.hs-boot index df92ad4f..cacfeed4 100644 --- a/src/Rel8/Query.hs-boot +++ b/src/Rel8/Query.hs-boot @@ -7,11 +7,13 @@ where -- base import Data.Kind ( Type ) +import Data.Monoid ( Any ) import Prelude () -- opaleye +import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye import qualified Opaleye.Select as Opaleye type Query :: Type -> Type -newtype Query a = Query (Opaleye.Select a) +newtype Query a = Query ([Opaleye.PrimExpr] -> Opaleye.Select (Any, a)) diff --git a/src/Rel8/Query/Evaluate.hs b/src/Rel8/Query/Evaluate.hs index 33dd7c00..86fe623c 100644 --- a/src/Rel8/Query/Evaluate.hs +++ b/src/Rel8/Query/Evaluate.hs @@ -1,20 +1,18 @@ -{-# language DerivingStrategies #-} {-# language FlexibleContexts #-} -{-# language GeneralizedNewtypeDeriving #-} -{-# language NamedFieldPuns #-} -{-# language StandaloneKindSignatures #-} +{-# language TupleSections #-} module Rel8.Query.Evaluate - ( Evaluate - , eval - , evaluate + ( evaluate + , rebind ) where -- base -import Data.Kind ( Type ) -import Data.Monoid ( Endo ( Endo ), appEndo ) -import Prelude +import Control.Monad ( (>=>) ) +import Data.Foldable ( foldl' ) +import Data.List.NonEmpty ( NonEmpty( (:|) ), nonEmpty ) +import Data.Monoid ( Any( Any ) ) +import Prelude hiding ( undefined ) -- opaleye import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye @@ -26,57 +24,42 @@ import qualified Opaleye.Internal.Unpackspec as Opaleye -- rel8 import Rel8.Expr ( Expr ) +import Rel8.Expr.Bool ( (&&.) ) +import Rel8.Expr.Opaleye ( fromPrimExpr ) import Rel8.Query ( Query( Query ) ) import Rel8.Table ( Table ) +import Rel8.Table.Bool ( case_ ) import Rel8.Table.Opaleye ( unpackspec ) +import Rel8.Table.Undefined --- semigroupoids -import Data.Functor.Apply ( Apply ) -import Data.Functor.Bind ( Bind, (>>-) ) --- transformers -import Control.Monad.Trans.State.Strict ( State, get, put, runState ) +-- | 'evaluate' takes expressions that could potentially have side effects and +-- \"runs\" them in the 'Query' monad. The returned expressions have no side +-- effects and can safely be reused. +evaluate :: Table Expr a => a -> Query a +evaluate = laterally >=> rebind -type Evaluations :: Type -data Evaluations = Evaluations - { tag :: !Opaleye.Tag - , bindings :: !(Endo (Opaleye.Bindings Opaleye.PrimExpr)) - } +laterally :: Table Expr a => a -> Query a +laterally a = Query $ \bindings -> pure $ (Any True,) $ + case nonEmpty bindings of + Nothing -> a + Just bindings' -> case_ [(condition, a)] undefined + where + condition = foldl1' (&&.) (fmap go bindings') + where + go = fromPrimExpr . Opaleye.UnExpr Opaleye.OpIsNotNull --- | Some PostgreSQL functions, such as 'Rel8.nextval', have side effects, --- breaking the referential transparency we would otherwise enjoy. --- --- To try to recover our ability to reason about such expressions, 'Evaluate' --- allows us to control the evaluation order of side-effects by sequencing --- them monadically. -type Evaluate :: Type -> Type -newtype Evaluate a = Evaluate (State Evaluations a) - deriving newtype (Functor, Apply, Applicative, Monad) - - -instance Bind Evaluate where - (>>-) = (>>=) - - --- | 'eval' takes expressions that could potentially have side effects and --- \"runs\" them in the 'Evaluate' monad. The returned expressions have no --- side effetcs and can safely be reused. -eval :: Table Expr a => a -> Evaluate a -eval a = Evaluate $ do - Evaluations {tag, bindings} <- get +rebind :: Table Expr a => a -> Query a +rebind a = Query $ \_ -> Opaleye.QueryArr $ \(_, query, tag) -> let tag' = Opaleye.next tag - (a', bindings') = Opaleye.run $ + (a', bindings) = Opaleye.run $ Opaleye.runUnpackspec unpackspec (Opaleye.extractAttr "eval" tag') a - put Evaluations {tag = tag', bindings = bindings <> Endo (bindings' ++)} - pure a' + in + ((mempty, a'), Opaleye.Rebind True bindings query, tag') --- | 'evaluate' runs an 'Evaluate' inside the 'Query' monad. -evaluate :: Evaluate a -> Query a -evaluate (Evaluate m) = Query $ Opaleye.QueryArr $ \(_, query, tag) -> - case runState m (Evaluations tag mempty) of - (a, Evaluations {tag = tag', bindings}) -> - (a, Opaleye.Rebind True (appEndo bindings mempty) query, tag') +foldl1' :: (a -> a -> a) -> NonEmpty a -> a +foldl1' f (a :| as) = foldl' f a as diff --git a/src/Rel8/Query/Opaleye.hs b/src/Rel8/Query/Opaleye.hs index a78b625a..e86c89ef 100644 --- a/src/Rel8/Query/Opaleye.hs +++ b/src/Rel8/Query/Opaleye.hs @@ -1,3 +1,5 @@ +{-# language TupleSections #-} + module Rel8.Query.Opaleye ( fromOpaleye , toOpaleye @@ -7,28 +9,54 @@ module Rel8.Query.Opaleye where -- base +import Control.Applicative ( liftA2 ) import Prelude -- opaleye -import qualified Opaleye.Select as Opaleye +import qualified Opaleye.Internal.QueryArr as Opaleye -- rel8 import {-# SOURCE #-} Rel8.Query ( Query( Query ) ) fromOpaleye :: Opaleye.Select a -> Query a -fromOpaleye = Query +fromOpaleye = Query . pure . fmap pure toOpaleye :: Query a -> Opaleye.Select a -toOpaleye (Query a) = a +toOpaleye (Query a) = snd <$> a mempty mapOpaleye :: (Opaleye.Select a -> Opaleye.Select b) -> Query a -> Query b -mapOpaleye f = fromOpaleye . f . toOpaleye +mapOpaleye f (Query a) = Query (fmap (mapping f) a) zipOpaleyeWith :: () => (Opaleye.Select a -> Opaleye.Select b -> Opaleye.Select c) -> Query a -> Query b -> Query c -zipOpaleyeWith f a b = fromOpaleye $ f (toOpaleye a) (toOpaleye b) +zipOpaleyeWith f (Query a) (Query b) = Query $ liftA2 (zipping f) a b + + +mapping :: () + => (Opaleye.Select a -> Opaleye.Select b) + -> Opaleye.Select (m, a) -> Opaleye.Select (m, b) +mapping f q@(Opaleye.QueryArr qa) = Opaleye.QueryArr $ \(_, query, tag) -> + let + ((m, _), _, _) = qa ((), query, tag) + Opaleye.QueryArr qa' = (m,) <$> f (snd <$> q) + in + qa' ((), query, tag) + + +zipping :: Semigroup m + => (Opaleye.Select a -> Opaleye.Select b -> Opaleye.Select c) + -> Opaleye.Select (m, a) -> Opaleye.Select (m, b) -> Opaleye.Select (m, c) +zipping f q@(Opaleye.QueryArr qa) q'@(Opaleye.QueryArr qa') = + Opaleye.QueryArr $ \(_, query, tag) -> + let + ((m, _), _, _) = qa ((), query, tag) + ((m', _), _, _) = qa' ((), query, tag) + m'' = m <> m' + Opaleye.QueryArr qa'' = (m'',) <$> f (snd <$> q) (snd <$> q') + in + qa'' ((), query, tag) diff --git a/tests/Main.hs b/tests/Main.hs index 7a1ffa58..d15a4121 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -121,6 +121,7 @@ tests = , testSelectNestedPairs getTestDatabase , testSelectArray getTestDatabase , testNestedMaybeTable getTestDatabase + , testEvaluate getTestDatabase ] where @@ -132,6 +133,7 @@ tests = flip run conn do sql "CREATE EXTENSION citext" sql "CREATE TABLE test_table ( column1 text not null, column2 bool not null )" + sql "CREATE SEQUENCE test_seq" return db @@ -728,3 +730,52 @@ testNestedMaybeTable = databasePropertyTest "Can nest MaybeTable within other ta pure $ Rel8.maybeTable (Rel8.lit False) (\_ -> Rel8.lit True) (nmt2 x) selected === [True] + + +testEvaluate :: IO TmpPostgres.DB -> TestTree +testEvaluate = databasePropertyTest "evaluate has the evaluation order we expect" \transaction -> do + + transaction \connection -> do + selected <- liftIO $ Rel8.select connection do + x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c']) + y <- Rel8.evaluate (Rel8.nextval "test_seq") + pure (x, (y, y)) + + normalize selected === + [ ('a', (0, 0)) + , ('b', (1, 1)) + , ('c', (2, 2)) + ] + + selected' <- liftIO $ Rel8.select connection do + x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c']) + y <- Rel8.values (Rel8.lit <$> ['d', 'e', 'f']) + z <- Rel8.evaluate (Rel8.nextval "test_seq") + pure ((x, y), (z, z)) + + normalize selected' === + [ (('a', 'd'), (0, 0)) + , (('b', 'd'), (1, 1)) + , (('c', 'd'), (2, 2)) + , (('a', 'e'), (3, 3)) + , (('b', 'e'), (4, 4)) + , (('c', 'e'), (5, 5)) + , (('a', 'f'), (6, 6)) + , (('b', 'f'), (7, 7)) + , (('c', 'f'), (8, 8)) + ] + + where + normalize :: [(x, (Int64, Int64))] -> [(x, (Int64, Int64))] + normalize [] = [] + normalize xs@((_, (i, _)) : _) = map (fmap (\(a, b) -> (a - i, b - i))) xs + +{- + selected <- liftIO $ Rel8.select connection do + x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c']) + x <- Rel8.values (Rel8.lit <$> ['d', 'e', 'f']) + y <- Rel8.evaluate (Rel8.nextval "test_seq") + pure (x, y, y) + + selected === [('a', 1, 1), ('b', 2, 2), ('c', 3, 3)] +-}