Skip to content

Commit

Permalink
Add fingerprint of Generic representation when serializing.
Browse files Browse the repository at this point in the history
The idea is we can use Rep to get a full, structural representation
of a type, and the fingerprint it using Typeable.  This gives
us a very concise way of fingerprinting our Binary representation.

This patch is not completely correct; the fingerprint needs
to be overridable when someone writes a custom Binary instance.
But this should be "good enough" in practice; we're not using
these fingerprints to check anything security critical.

TODO: Not sure if I have tagged all the call-sites which could
profit from this.

Fixes haskell#4059.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
  • Loading branch information
ezyang committed Nov 5, 2016
1 parent 39d11e5 commit ebcae71
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 19 deletions.
1 change: 1 addition & 0 deletions Cabal/Cabal.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ library
Distribution.Utils.NubList
Distribution.Utils.ShortText
Distribution.Utils.Progress
Distribution.Utils.BinaryWithFingerprint
Distribution.Verbosity
Distribution.Version
Language.Haskell.Extension
Expand Down
6 changes: 3 additions & 3 deletions Cabal/Distribution/Simple/Configure.hs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ import qualified Distribution.Simple.HaskellSuite as HaskellSuite

import Control.Exception
( ErrorCall, Exception, evaluate, throw, throwIO, try )
import Distribution.Compat.Binary ( decodeOrFailIO, encode )
import Distribution.Utils.BinaryWithFingerprint
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy.Char8 as BLC8
Expand Down Expand Up @@ -195,7 +195,7 @@ getConfigStateFile filename = do
Right x -> x

let getStoredValue = do
result <- decodeOrFailIO (BLC8.tail body)
result <- decodeWithFingerprintOrFailIO (BLC8.tail body)
case result of
Left _ -> throw ConfigStateFileNoParse
Right x -> return x
Expand Down Expand Up @@ -240,7 +240,7 @@ writePersistBuildConfig :: FilePath -- ^ The @dist@ directory path.
writePersistBuildConfig distPref lbi = do
createDirectoryIfMissing False distPref
writeFileAtomic (localBuildInfoFile distPref) $
BLC8.unlines [showHeader pkgId, encode lbi]
BLC8.unlines [showHeader pkgId, encodeWithFingerprint lbi]
where
pkgId = localPackage lbi

Expand Down
87 changes: 87 additions & 0 deletions Cabal/Distribution/Utils/BinaryWithFingerprint.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
-- | Support for binary serialization with a fingerprint.
module Distribution.Utils.BinaryWithFingerprint (
encodeWithFingerprint,
decodeWithFingerprint,
decodeWithFingerprintOrFailIO,
) where

#if MIN_VERSION_base(4,8,0)

import Distribution.Compat.Binary
import Data.ByteString.Lazy (ByteString)
import Control.Exception

import Data.Binary.Get
import Data.Binary.Put

import GHC.Generics
import GHC.Fingerprint
import Data.Typeable
import Control.Monad

-- | Private wrapper type so we can give 'Binary' instance for
-- 'Fingerprint'
newtype FP = FP Fingerprint

instance Binary FP where
put (FP (Fingerprint a b)) = put a >> put b
get = do
a <- get
b <- get
return (FP (Fingerprint a b))

fingerprintRep :: forall a. Typeable (Rep a) => Proxy a -> Fingerprint
fingerprintRep _ = typeRepFingerprint (typeRep (Proxy :: Proxy (Rep a)))

-- | Encode a value, recording a fingerprint in the header.
--
-- The fingerprint is GHC's Typeable fingerprint associated with
-- the Generic Rep of a type: this fingerprint is better than
-- the fingerprint of the type itself, as it changes when the
-- representation changes (and thus the binary serialization format
-- changes.)
--
encodeWithFingerprint :: forall a. (Binary a, Typeable (Rep a)) => a -> ByteString
encodeWithFingerprint x = runPut $ do
put (FP (fingerprintRep (Proxy :: Proxy a)))
put x

-- | Decode a value, verifying the fingerprint in the header.
--
decodeWithFingerprint :: forall a. (Binary a, Typeable (Rep a)) => ByteString -> a
decodeWithFingerprint = runGet $ do
FP fp <- get
let expect_fp = fingerprintRep (Proxy :: Proxy a)
when (expect_fp /= fp) $
fail $ "Expected fingerprint " ++ show expect_fp ++
" but got " ++ show fp
get

-- | Decode a value, forcing the decoded value to discover decoding errors
-- and report them.
--
decodeWithFingerprintOrFailIO :: (Binary a, Typeable (Rep a)) => ByteString -> IO (Either String a)
decodeWithFingerprintOrFailIO bs =
catch (evaluate (decodeWithFingerprint bs) >>= return . Right)
$ \(ErrorCall str) -> return $ Left str

#else

import Distribution.Compat.Binary
import Data.ByteString.Lazy (ByteString)

-- Dummy implementations that don't actually save fingerprints

encodeWithFingerprint :: Binary a => a -> ByteString
encodeWithFingerprint = encode

decodeWithFingerprint :: Binary a => ByteString -> a
decodeWithFingerprint = decode

decodeWithFingerprintOrFailIO :: Binary a => ByteString -> IO (Either String a)
decodeWithFingerprintOrFailIO = decodeOrFailIO

#endif
16 changes: 8 additions & 8 deletions cabal-install/Distribution/Client/FileMonitor.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import qualified Data.Map.Strict as Map
import qualified Data.Map as Map
#endif
import qualified Data.ByteString.Lazy as BS
import qualified Distribution.Compat.Binary as Binary
import qualified Distribution.Utils.BinaryWithFingerprint as Binary
import qualified Data.Hashable as Hashable

import Control.Monad
Expand Down Expand Up @@ -403,7 +403,7 @@ data MonitorChangedReason a =
-- See 'FileMonitor' for a full explanation.
--
checkFileMonitorChanged
:: (Binary a, Binary b)
:: (Binary a, Binary b, Typeable a, Typeable b)
=> FileMonitor a b -- ^ cache file path
-> FilePath -- ^ root directory
-> a -- ^ guard or key value
Expand Down Expand Up @@ -481,23 +481,23 @@ checkFileMonitorChanged
--
-- This determines the type and format of the binary cache file.
--
readCacheFile :: (Binary a, Binary b)
readCacheFile :: (Binary a, Binary b, Typeable a, Typeable b)
=> FileMonitor a b
-> IO (Either String (MonitorStateFileSet, a, b))
readCacheFile FileMonitor {fileMonitorCacheFile} =
withBinaryFile fileMonitorCacheFile ReadMode $ \hnd ->
Binary.decodeOrFailIO =<< BS.hGetContents hnd
Binary.decodeWithFingerprintOrFailIO =<< BS.hGetContents hnd

-- | Helper for writing the cache file.
--
-- This determines the type and format of the binary cache file.
--
rewriteCacheFile :: (Binary a, Binary b)
rewriteCacheFile :: (Binary a, Binary b, Typeable a, Typeable b)
=> FileMonitor a b
-> MonitorStateFileSet -> a -> b -> IO ()
rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result =
writeFileAtomic fileMonitorCacheFile $
Binary.encode (fileset, key, result)
Binary.encodeWithFingerprint (fileset, key, result)

-- | Probe the file system to see if any of the monitored files have changed.
--
Expand Down Expand Up @@ -758,7 +758,7 @@ probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing =
-- any files then you can use @Nothing@ for the timestamp parameter.
--
updateFileMonitor
:: (Binary a, Binary b)
:: (Binary a, Binary b, Typeable a, Typeable b)
=> FileMonitor a b -- ^ cache file path
-> FilePath -- ^ root directory
-> Maybe MonitorTimestamp -- ^ timestamp when the update action started
Expand Down Expand Up @@ -965,7 +965,7 @@ getFileHash hashcache relfile absfile mtime =
-- that the set of files to monitor can change then it's simpler just to throw
-- away the structure and use a finite map.
--
readCacheFileHashes :: (Binary a, Binary b)
readCacheFileHashes :: (Binary a, Binary b, Typeable a, Typeable b)
=> FileMonitor a b -> IO FileHashCache
readCacheFileHashes monitor =
handleDoesNotExist Map.empty $
Expand Down
18 changes: 16 additions & 2 deletions cabal-install/Distribution/Client/ProjectPlanOutput.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{-# LANGUAGE BangPatterns, RecordWildCards, NamedFieldPuns,
DeriveGeneric, DeriveDataTypeable, GeneralizedNewtypeDeriving,
ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}

module Distribution.Client.ProjectPlanOutput (
-- * Plan output
Expand Down Expand Up @@ -39,6 +41,7 @@ import Distribution.Text
import qualified Distribution.Compat.Graph as Graph
import Distribution.Compat.Graph (Graph, Node)
import qualified Distribution.Compat.Binary as Binary
import qualified Distribution.Utils.BinaryWithFingerprint as Binary
import Distribution.Simple.Utils
import Distribution.Verbosity
import qualified Paths_cabal_install as Our (version)
Expand All @@ -51,6 +54,7 @@ import qualified Data.Set as Set
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Builder as BB

import GHC.Generics
import System.FilePath
import System.IO

Expand Down Expand Up @@ -310,6 +314,15 @@ encodePlanAsJson distDirLayout elaboratedInstallPlan elaboratedSharedConfig =
type PackageIdSet = Set UnitId
type PackagesUpToDate = PackageIdSet

newtype PackagesUpToDateG = PackagesUpToDateG { unPackagesUpToDateG :: PackagesUpToDate }

instance Binary.Binary PackagesUpToDateG

instance Generic PackagesUpToDateG where
type Rep PackagesUpToDateG = Rep [UnitId]
from = from . Set.toList . unPackagesUpToDateG
to = PackagesUpToDateG . Set.fromList . to

data PostBuildProjectStatus = PostBuildProjectStatus {

-- | Packages that are known to be up to date. These were found to be
Expand Down Expand Up @@ -628,7 +641,8 @@ readPackagesUpToDateCacheFile DistDirLayout{distProjectCacheFile} =
handleDoesNotExist Set.empty $
handleDecodeFailure $
withBinaryFile (distProjectCacheFile "up-to-date") ReadMode $ \hnd ->
Binary.decodeOrFailIO =<< BS.hGetContents hnd
fmap (fmap unPackagesUpToDateG) .
Binary.decodeWithFingerprintOrFailIO =<< BS.hGetContents hnd
where
handleDecodeFailure = fmap (either (const Set.empty) id)

Expand All @@ -639,7 +653,7 @@ readPackagesUpToDateCacheFile DistDirLayout{distProjectCacheFile} =
writePackagesUpToDateCacheFile :: DistDirLayout -> PackagesUpToDate -> IO ()
writePackagesUpToDateCacheFile DistDirLayout{distProjectCacheFile} upToDate =
writeFileAtomic (distProjectCacheFile "up-to-date") $
Binary.encode upToDate
Binary.encodeWithFingerprint (PackagesUpToDateG upToDate)

-- Writing .ghc.environment files
--
Expand Down
2 changes: 1 addition & 1 deletion cabal-install/Distribution/Client/RebuildMonad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ askRoot = Rebuild Reader.ask
--
-- Do not share 'FileMonitor's between different uses of 'rerunIfChanged'.
--
rerunIfChanged :: (Binary a, Binary b)
rerunIfChanged :: (Binary a, Binary b, Typeable a, Typeable b)
=> Verbosity
-> FileMonitor a b
-> a
Expand Down
11 changes: 6 additions & 5 deletions cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module UnitTests.Distribution.Client.FileMonitor (tests) where

import Data.Typeable
import Control.Monad
import Control.Exception
import Control.Concurrent (threadDelay)
Expand Down Expand Up @@ -811,7 +812,7 @@ monitorFileGlobStr globstr
| otherwise = error $ "Failed to parse " ++ globstr


expectMonitorChanged :: (Binary a, Binary b)
expectMonitorChanged :: (Binary a, Binary b, Typeable a, Typeable b)
=> RootPath -> FileMonitor a b -> a
-> IO (MonitorChangedReason a)
expectMonitorChanged root monitor key = do
Expand All @@ -820,7 +821,7 @@ expectMonitorChanged root monitor key = do
MonitorChanged reason -> return reason
MonitorUnchanged _ _ -> throwIO $ HUnitFailure "expected change"

expectMonitorUnchanged :: (Binary a, Binary b)
expectMonitorUnchanged :: (Binary a, Binary b, Typeable a, Typeable b)
=> RootPath -> FileMonitor a b -> a
-> IO (b, [MonitorFilePath])
expectMonitorUnchanged root monitor key = do
Expand All @@ -829,19 +830,19 @@ expectMonitorUnchanged root monitor key = do
MonitorChanged _reason -> throwIO $ HUnitFailure "expected no change"
MonitorUnchanged b files -> return (b, files)

checkChanged :: (Binary a, Binary b)
checkChanged :: (Binary a, Binary b, Typeable a, Typeable b)
=> RootPath -> FileMonitor a b
-> a -> IO (MonitorChanged a b)
checkChanged (RootPath root) monitor key =
checkFileMonitorChanged monitor root key

updateMonitor :: (Binary a, Binary b)
updateMonitor :: (Binary a, Binary b, Typeable a, Typeable b)
=> RootPath -> FileMonitor a b
-> [MonitorFilePath] -> a -> b -> IO ()
updateMonitor (RootPath root) monitor files key result =
updateFileMonitor monitor root Nothing files key result

updateMonitorWithTimestamp :: (Binary a, Binary b)
updateMonitorWithTimestamp :: (Binary a, Binary b, Typeable a, Typeable b)
=> RootPath -> FileMonitor a b -> MonitorTimestamp
-> [MonitorFilePath] -> a -> b -> IO ()
updateMonitorWithTimestamp (RootPath root) monitor timestamp files key result =
Expand Down

0 comments on commit ebcae71

Please sign in to comment.