Skip to content

Commit

Permalink
Improve the performance of GetModIfaceFromDisk in large repos and del…
Browse files Browse the repository at this point in the history
…ete GetDependencies (#2323)

* Improve the performance of GetModIfaceFromDisk in large repos

There are three benefits:
1. GetModIfaceFromDisk and GhcSessionDeps no longer depend on the transitive module summaries. This means fewer edges in the build graph = smaller build graph = faster builds
2. Avoid duplicate computations in setting up the GHC session with the dependencies of the module. Previously the total work done was O(NlogN) in the number of transitive dependencies, now it is O(N).
3. Increased sharing of HPT and FinderCache. Ideally we should also
   share the module graphs, but the datatype is abstract, doesn't have a
   monoid instance, and cannot be coerced to something that has. We will
   need to add the Monoid instance in GHC first.

On the Sigma repo:
- the startup metric goes down by ~34%.
- The edit metric also goes down by 15%.
- Max residency is down by 30% in the edit benchmark.

* format importes

* clean up

* remove stale comment

* fix build in GHC 9

* clean up

* Unify defintions of ghcSessionDeps

* mark test as no longer failing

* Prevent duplicate missing module diagnostics

* delete GetDependencies

* add a test for deeply nested import cycles

* Fix build in GHC 9.0

* bump ghcide version

* Introduce config options for the main rules

Surfacing the performance tradeoffs in the core build rules

* Avoid using the Monoid instance (removed in 9.4 ?????)

* Fix build with GHC 9

* Fix Eval plugin
  • Loading branch information
pepeiborra committed Nov 16, 2021
1 parent aa99021 commit 9514622
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 113 deletions.
2 changes: 1 addition & 1 deletion ghcide/.hlint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
# Things that are unsafe in Haskell base library
- {name: unsafeInterleaveIO, within: [Development.IDE.LSP.LanguageServer]}
- {name: unsafeDupablePerformIO, within: []}
- {name: unsafeCoerce, within: [Ide.Plugin.Eval.Code, Development.IDE.Types.Shake]}
- {name: unsafeCoerce, within: [Ide.Plugin.Eval.Code, Development.IDE.Core.Compile, Development.IDE.Types.Shake]}
# Things that are a bit dangerous in the GHC API
- {name: nameModule, within: []}

Expand Down
3 changes: 2 additions & 1 deletion ghcide/exe/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module Main(main) where
import Arguments (Arguments (..),
getArguments)
import Control.Monad.Extra (unless, whenJust)
import Data.Default (def)
import Data.Version (showVersion)
import Development.GitRev (gitHash)
import Development.IDE (Priority (Debug, Info),
Expand Down Expand Up @@ -60,7 +61,7 @@ main = withTelemetryLogger $ \telemetryLogger -> do

,Main.argsRules = do
-- install the main and ghcide-plugin rules
mainRule
mainRule def
-- install the kick action, which triggers a typecheck on every
-- Shake database restart, i.e. on every user edit.
unless argsDisableKick $
Expand Down
2 changes: 1 addition & 1 deletion ghcide/ghcide.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cabal-version: 2.4
build-type: Simple
category: Development
name: ghcide
version: 1.4.2.3
version: 1.4.2.4
license: Apache-2.0
license-file: LICENSE
author: Digital Asset and Ghcide contributors
Expand Down
31 changes: 30 additions & 1 deletion ghcide/src/Development/IDE/Core/Compile.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ module Development.IDE.Core.Compile
, setupFinderCache
, getDocsBatch
, lookupName
) where
,mergeEnvs) where

import Development.IDE.Core.Preprocessor
import Development.IDE.Core.RuleTypes
Expand Down Expand Up @@ -89,8 +89,10 @@ import System.Directory
import System.FilePath
import System.IO.Extra (fixIO, newTempFileWithin)

-- GHC API imports
-- GHC API imports
import GHC (GetDocsFailure (..),
mgModSummaries,
parsedSource)

import Control.Concurrent.Extra
Expand All @@ -100,11 +102,14 @@ import Data.Binary
import Data.Coerce
import Data.Functor
import qualified Data.HashMap.Strict as HashMap
import Data.Map (Map)
import Data.Tuple.Extra (dupe)
import Data.Unique as Unique
import Development.IDE.Core.Tracing (withTrace)
import Development.IDE.GHC.Compat.Util (emptyUDFM, plusUDFM)
import qualified Language.LSP.Server as LSP
import qualified Language.LSP.Types as LSP
import Unsafe.Coerce

-- | Given a string buffer, return the string (after preprocessing) and the 'ParsedModule'.
parseModule
Expand Down Expand Up @@ -686,6 +691,30 @@ loadModulesHome mod_infos e =
where
mod_name = moduleName . mi_module . hm_iface

-- Merge the HPTs, module graphs and FinderCaches
mergeEnvs :: HscEnv -> [ModSummary] -> [HomeModInfo] -> [HscEnv] -> IO HscEnv
mergeEnvs env extraModSummaries extraMods envs = do
prevFinderCache <- concatFC <$> mapM (readIORef . hsc_FC) envs
let ims = map (Compat.installedModule (homeUnitId_ $ hsc_dflags env) . moduleName . ms_mod) extraModSummaries
ifrs = zipWith (\ms -> InstalledFound (ms_location ms)) extraModSummaries ims
newFinderCache <- newIORef $
foldl'
(\fc (im, ifr) -> Compat.extendInstalledModuleEnv fc im ifr) prevFinderCache
$ zip ims ifrs
return $ loadModulesHome extraMods $ env{
hsc_HPT = foldMapBy plusUDFM emptyUDFM hsc_HPT envs,
hsc_FC = newFinderCache,
hsc_mod_graph = mkModuleGraph $ extraModSummaries ++ nubOrdOn ms_mod (concatMap (mgModSummaries . hsc_mod_graph) envs)
}
where
-- required because 'FinderCache':
-- 1) doesn't have a 'Monoid' instance,
-- 2) is abstract and doesn't export constructors
-- To work around this, we coerce to the underlying type
-- To remove this, I plan to upstream the missing Monoid instance
concatFC :: [FinderCache] -> FinderCache
concatFC = unsafeCoerce (mconcat @(Map InstalledModule InstalledFindResult))

withBootSuffix :: HscSource -> ModLocation -> ModLocation
withBootSuffix HsBootFile = addBootSuffixLocnOut
withBootSuffix _ = id
Expand Down
10 changes: 1 addition & 9 deletions ghcide/src/Development/IDE/Core/RuleTypes.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,6 @@ type instance RuleResult GetParsedModuleWithComments = ParsedModule
-- a module could not be parsed or an import cycle.
type instance RuleResult GetDependencyInformation = DependencyInformation

-- | Transitive module and pkg dependencies based on the information produced by GetDependencyInformation.
-- This rule is also responsible for calling ReportImportCycles for each file in the transitive closure.
type instance RuleResult GetDependencies = TransitiveDependencies

type instance RuleResult GetModuleGraph = DependencyInformation

data GetKnownTargets = GetKnownTargets
Expand Down Expand Up @@ -234,6 +230,7 @@ type instance RuleResult GetDocMap = DocAndKindMap
type instance RuleResult GhcSession = HscEnvEq

-- | A GHC session preloaded with all the dependencies
-- This rule is also responsible for calling ReportImportCycles for the direct dependencies
type instance RuleResult GhcSessionDeps = HscEnvEq

-- | Resolve the imports in a module to the file path of a module in the same package
Expand Down Expand Up @@ -389,11 +386,6 @@ data ReportImportCycles = ReportImportCycles
instance Hashable ReportImportCycles
instance NFData ReportImportCycles

data GetDependencies = GetDependencies
deriving (Eq, Show, Typeable, Generic)
instance Hashable GetDependencies
instance NFData GetDependencies

data TypeCheck = TypeCheck
deriving (Eq, Show, Typeable, Generic)
instance Hashable TypeCheck
Expand Down
124 changes: 69 additions & 55 deletions ghcide/src/Development/IDE/Core/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
--
module Development.IDE.Core.Rules(
-- * Types
IdeState, GetDependencies(..), GetParsedModule(..), TransitiveDependencies(..),
IdeState, GetParsedModule(..), TransitiveDependencies(..),
Priority(..), GhcSessionIO(..), GetClientSettings(..),
-- * Functions
priorityTypeCheck,
Expand All @@ -22,6 +22,7 @@ module Development.IDE.Core.Rules(
defineNoFile,
defineEarlyCutOffNoFile,
mainRule,
RulesConfig(..),
getDependencies,
getParsedModule,
getParsedModuleWithComments,
Expand All @@ -35,7 +36,6 @@ module Development.IDE.Core.Rules(
getLocatedImportsRule,
getDependencyInformationRule,
reportImportCyclesRule,
getDependenciesRule,
typeCheckRule,
getDocMapRule,
loadGhcSession,
Expand All @@ -57,6 +57,7 @@ module Development.IDE.Core.Rules(
ghcSessionDepsDefinition,
getParsedModuleDefinition,
typeCheckRuleDefinition,
GhcSessionDepsConfig(..),
) where

#if !MIN_VERSION_ghc(8,8,0)
Expand Down Expand Up @@ -139,8 +140,7 @@ import qualified Language.LSP.Server as LSP
import Language.LSP.Types (SMethod (SCustomMethod))
import Language.LSP.VFS
import System.Directory (canonicalizePath, makeAbsolute)

import Data.Default (def)
import Data.Default (def, Default)
import Ide.Plugin.Properties (HasProperty,
KeyNameProxy,
Properties,
Expand All @@ -149,7 +149,6 @@ import Ide.Plugin.Properties (HasProperty,
import Ide.PluginUtils (configForPlugin)
import Ide.Types (DynFlagsModifications (dynFlagsModifyGlobal, dynFlagsModifyParser),
PluginId)
import qualified Data.HashSet as HS

-- | This is useful for rules to convert rules that can only produce errors or
-- a result into the more general IdeResult type that supports producing
Expand All @@ -163,7 +162,8 @@ toIdeResult = either (, Nothing) (([],) . Just)
-- | Get all transitive file dependencies of a given module.
-- Does not include the file itself.
getDependencies :: NormalizedFilePath -> Action (Maybe [NormalizedFilePath])
getDependencies file = fmap transitiveModuleDeps <$> use GetDependencies file
getDependencies file =
fmap transitiveModuleDeps . (`transitiveDeps` file) <$> use_ GetDependencyInformation file

getSourceFileSource :: NormalizedFilePath -> Action BS.ByteString
getSourceFileSource nfp = do
Expand Down Expand Up @@ -334,7 +334,7 @@ getLocatedImportsRule =
return $ if itExists then Just nfp' else Nothing
| Just tt <- HM.lookup (TargetModule modName) targets = do
-- reuse the existing NormalizedFilePath in order to maximize sharing
let ttmap = HM.mapWithKey const (HS.toMap tt)
let ttmap = HM.mapWithKey const (HashSet.toMap tt)
nfp' = HM.lookupDefault nfp nfp ttmap
itExists <- getFileExists nfp'
return $ if itExists then Just nfp' else Nothing
Expand Down Expand Up @@ -492,18 +492,6 @@ reportImportCyclesRule =
pure (moduleNameString . moduleName . ms_mod $ ms)
showCycle mods = T.intercalate ", " (map T.pack mods)

-- returns all transitive dependencies in topological order.
-- NOTE: result does not include the argument file.
getDependenciesRule :: Rules ()
getDependenciesRule =
defineEarlyCutoff $ RuleNoDiagnostics $ \GetDependencies file -> do
depInfo <- use_ GetDependencyInformation file
let allFiles = reachableModules depInfo
_ <- uses_ ReportImportCycles allFiles
opts <- getIdeOptions
let mbFingerprints = map (Util.fingerprintString . fromNormalizedFilePath) allFiles <$ optShakeFiles opts
return (fingerprintToBS . Util.fingerprintFingerprints <$> mbFingerprints, transitiveDeps depInfo file)

getHieAstsRule :: Rules ()
getHieAstsRule =
define $ \GetHieAst f -> do
Expand Down Expand Up @@ -654,8 +642,8 @@ currentLinkables = do
where
go (mod, time) = LM time mod []

loadGhcSession :: Rules ()
loadGhcSession = do
loadGhcSession :: GhcSessionDepsConfig -> Rules ()
loadGhcSession ghcSessionDepsConfig = do
-- This function should always be rerun because it tracks changes
-- to the version of the collection of HscEnv's.
defineEarlyCutOffNoFile $ \GhcSessionIO -> do
Expand Down Expand Up @@ -691,49 +679,65 @@ loadGhcSession = do
Nothing -> LBS.toStrict $ B.encode (hash (snd val))
return (Just cutoffHash, val)

define $ \GhcSessionDeps file -> ghcSessionDepsDefinition file

ghcSessionDepsDefinition :: NormalizedFilePath -> Action (IdeResult HscEnvEq)
ghcSessionDepsDefinition file = do
defineNoDiagnostics $ \GhcSessionDeps file -> do
env <- use_ GhcSession file
let hsc = hscEnv env
ms <- msrModSummary <$> use_ GetModSummaryWithoutTimestamps file
deps <- use_ GetDependencies file
let tdeps = transitiveModuleDeps deps
uses_th_qq =
xopt LangExt.TemplateHaskell dflags || xopt LangExt.QuasiQuotes dflags
dflags = ms_hspp_opts ms
ifaces <- if uses_th_qq
then uses_ GetModIface tdeps
else uses_ GetModIfaceWithoutLinkable tdeps

-- Currently GetDependencies returns things in topological order so A comes before B if A imports B.
-- We need to reverse this as GHC gets very unhappy otherwise and complains about broken interfaces.
-- Long-term we might just want to change the order returned by GetDependencies
let inLoadOrder = reverse (map hirHomeMod ifaces)

session' <- liftIO $ loadModulesHome inLoadOrder <$> setupFinderCache (map hirModSummary ifaces) hsc

res <- liftIO $ newHscEnvEqWithImportPaths (envImportPaths env) session' []
return ([], Just res)
ghcSessionDepsDefinition ghcSessionDepsConfig env file

data GhcSessionDepsConfig = GhcSessionDepsConfig
{ checkForImportCycles :: Bool
, forceLinkables :: Bool
, fullModSummary :: Bool
}
instance Default GhcSessionDepsConfig where
def = GhcSessionDepsConfig
{ checkForImportCycles = True
, forceLinkables = False
, fullModSummary = False
}

ghcSessionDepsDefinition :: GhcSessionDepsConfig -> HscEnvEq -> NormalizedFilePath -> Action (Maybe HscEnvEq)
ghcSessionDepsDefinition GhcSessionDepsConfig{..} env file = do
let hsc = hscEnv env

mbdeps <- mapM(fmap artifactFilePath . snd) <$> use_ GetLocatedImports file
case mbdeps of
Nothing -> return Nothing
Just deps -> do
when checkForImportCycles $ void $ uses_ ReportImportCycles deps
ms:mss <- map msrModSummary <$> if fullModSummary
then uses_ GetModSummary (file:deps)
else uses_ GetModSummaryWithoutTimestamps (file:deps)

depSessions <- map hscEnv <$> uses_ GhcSessionDeps deps
let uses_th_qq =
xopt LangExt.TemplateHaskell dflags || xopt LangExt.QuasiQuotes dflags
dflags = ms_hspp_opts ms
ifaces <- if uses_th_qq || forceLinkables
then uses_ GetModIface deps
else uses_ GetModIfaceWithoutLinkable deps

let inLoadOrder = map hirHomeMod ifaces
session' <- liftIO $ mergeEnvs hsc mss inLoadOrder depSessions

Just <$> liftIO (newHscEnvEqWithImportPaths (envImportPaths env) session' [])

-- | Load a iface from disk, or generate it if there isn't one or it is out of date
-- This rule also ensures that the `.hie` and `.o` (if needed) files are written out.
getModIfaceFromDiskRule :: Rules ()
getModIfaceFromDiskRule = defineEarlyCutoff $ Rule $ \GetModIfaceFromDisk f -> do
ms <- msrModSummary <$> use_ GetModSummary f
(diags_session, mb_session) <- ghcSessionDepsDefinition f
mb_session <- use GhcSessionDeps f
case mb_session of
Nothing -> return (Nothing, (diags_session, Nothing))
Nothing -> return (Nothing, ([], Nothing))
Just session -> do
sourceModified <- use_ IsHiFileStable f
linkableType <- getLinkableType f
r <- loadInterface (hscEnv session) ms sourceModified linkableType (regenerateHiFile session f ms)
case r of
(diags, Nothing) -> return (Nothing, (diags ++ diags_session, Nothing))
(diags, Nothing) -> return (Nothing, (diags, Nothing))
(diags, Just x) -> do
let !fp = Just $! hiFileFingerPrint x
return (fp, (diags <> diags_session, Just x))
return (fp, (diags, Just x))

-- | Check state of hiedb after loading an iface from disk - have we indexed the corresponding `.hie` file?
-- This function is responsible for ensuring database consistency
Expand Down Expand Up @@ -1055,20 +1059,28 @@ writeHiFileAction hsc hiFile = do
resetInterfaceStore extras $ toNormalizedFilePath' targetPath
writeHiFile hsc hiFile

data RulesConfig = RulesConfig
{ -- | Disable import cycle checking for improved performance in large codebases
checkForImportCycles :: Bool
-- | Disable TH for improved performance in large codebases
, enableTemplateHaskell :: Bool
}

instance Default RulesConfig where def = RulesConfig True True

-- | A rule that wires per-file rules together
mainRule :: Rules ()
mainRule = do
mainRule :: RulesConfig -> Rules ()
mainRule RulesConfig{..} = do
linkables <- liftIO $ newVar emptyModuleEnv
addIdeGlobal $ CompiledLinkables linkables
getParsedModuleRule
getParsedModuleWithCommentsRule
getLocatedImportsRule
getDependencyInformationRule
reportImportCyclesRule
getDependenciesRule
typeCheckRule
getDocMapRule
loadGhcSession
loadGhcSession def{checkForImportCycles}
getModIfaceFromDiskRule
getModIfaceFromDiskAndIndexRule
getModIfaceRule
Expand All @@ -1086,8 +1098,10 @@ mainRule = do
-- * ObjectLinkable -> BCOLinkable : the prev linkable can be reused, signal "no change"
-- * Object/BCO -> NoLinkable : the prev linkable can be ignored, signal "no change"
-- * otherwise : the prev linkable cannot be reused, signal "value has changed"
defineEarlyCutoff $ RuleWithCustomNewnessCheck (<=) $ \NeedsCompilation file ->
needsCompilationRule file
if enableTemplateHaskell
then defineEarlyCutoff $ RuleWithCustomNewnessCheck (<=) $ \NeedsCompilation file ->
needsCompilationRule file
else defineNoDiagnostics $ \NeedsCompilation _ -> return $ Just Nothing
generateCoreRule
getImportMapRule
getAnnotatedParsedSourceRule
Expand Down
2 changes: 2 additions & 0 deletions ghcide/src/Development/IDE/GHC/Compat/Units.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ module Development.IDE.GHC.Compat.Units (
ExternalPackageState(..),
-- * Utils
filterInplaceUnits,
FinderCache,
) where

#if MIN_VERSION_ghc(9,0,0)
Expand All @@ -53,6 +54,7 @@ import qualified GHC.Data.ShortText as ST
import GHC.Driver.Env (hsc_unit_dbs)
import GHC.Unit.Env
import GHC.Unit.External
import GHC.Unit.Finder
#else
import GHC.Driver.Types
#endif
Expand Down
Loading

0 comments on commit 9514622

Please sign in to comment.