Permalink
Browse files

ticky enhancements

  * the new StgCmmArgRep module breaks a dependency cycle; I also
    untabified it, but made no real changes

  * updated the documentation in the wiki and change the user guide to
    point there

  * moved the allocation enters for ticky and CCS to after the heap check

    * I left LDV where it was, which was before the heap check at least
      once, since I have no idea what it is

  * standardized all (active?) ticky alloc totals to bytes

  * in order to avoid double counting StgCmmLayout.adjustHpBackwards
    no longer bumps ALLOC_HEAP_ctr

  * I resurrected the SLOW_CALL counters

    * the new module StgCmmArgRep breaks cyclic dependency between
      Layout and Ticky (which the SLOW_CALL counters cause)

    * renamed them SLOW_CALL_fast_<pattern> and VERY_SLOW_CALL

  * added ALLOC_RTS_ctr and _tot ticky counters

    * eg allocation by Storage.c:allocate or a BUILD_PAP in stg_ap_*_info

    * resurrected ticky counters for ALLOC_THK, ALLOC_PAP, and
      ALLOC_PRIM

    * added -ticky and -DTICKY_TICKY in ways.mk for debug ways

  * added a ticky counter for total LNE entries

  * new flags for ticky: -ticky-allocd -ticky-dyn-thunk -ticky-LNE

    * all off by default

    * -ticky-allocd: tracks allocation *of* closure in addition to
       allocation *by* that closure

    * -ticky-dyn-thunk tracks dynamic thunks as if they were functions

    * -ticky-LNE tracks LNEs as if they were functions

  * updated the ticky report format, including making the argument
    categories (more?) accurate again

  * the printed name for things in the report include the unique of
    their ticky parent as well as if they are not top-level
  • Loading branch information...
1 parent c7d80c6 commit 460abd75c4f99d813ed226d2ff6aa592d62fafd4 Nicolas Frisby committed Mar 6, 2013
View
@@ -61,7 +61,7 @@ module CLabel (
mkCAFBlackHoleInfoTableLabel,
mkCAFBlackHoleEntryLabel,
mkRtsPrimOpLabel,
- mkRtsSlowTickyCtrLabel,
+ mkRtsSlowFastTickyCtrLabel,
mkSelectorInfoLabel,
mkSelectorEntryLabel,
@@ -99,7 +99,7 @@ module CLabel (
isCFunctionLabel, isGcPtrLabel, labelDynamic,
-- * Conversions
- toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl,
+ toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl, hasHaskellName,
pprCLabel
) where
@@ -313,7 +313,7 @@ data RtsLabelInfo
| RtsPrimOp PrimOp
| RtsApFast FastString -- ^ _fast versions of generic apply
- | RtsSlowTickyCtr String
+ | RtsSlowFastTickyCtr String
deriving (Eq, Ord)
-- NOTE: Eq on LitString compares the pointer only, so this isn't
@@ -356,9 +356,10 @@ mkTopSRTLabel :: Unique -> CLabel
mkTopSRTLabel u = SRTLabel u
mkSRTLabel :: Name -> CafInfo -> CLabel
-mkRednCountsLabel :: Name -> CafInfo -> CLabel
+mkRednCountsLabel :: Name -> CLabel
mkSRTLabel name c = IdLabel name c SRT
-mkRednCountsLabel name c = IdLabel name c RednCounts
+mkRednCountsLabel name =
+ IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE]
-- These have local & (possibly) external variants:
mkLocalClosureLabel :: Name -> CafInfo -> CLabel
@@ -503,8 +504,8 @@ mkCCSLabel ccs = CCS_Label ccs
mkRtsApFastLabel :: FastString -> CLabel
mkRtsApFastLabel str = RtsLabel (RtsApFast str)
-mkRtsSlowTickyCtrLabel :: String -> CLabel
-mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat)
+mkRtsSlowFastTickyCtrLabel :: String -> CLabel
+mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
-- Constructing Code Coverage Labels
@@ -549,10 +550,6 @@ toSlowEntryLbl :: CLabel -> CLabel
toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow
toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l)
-toRednCountsLbl :: CLabel -> CLabel
-toRednCountsLbl (IdLabel n c _) = IdLabel n c RednCounts
-toRednCountsLbl l = pprPanic "toRednCountsLbl" (ppr l)
-
toEntryLbl :: CLabel -> CLabel
toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry
toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry
@@ -574,12 +571,38 @@ toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo
toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo
toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l)
+toRednCountsLbl :: CLabel -> Maybe CLabel
+toRednCountsLbl = fmap mkRednCountsLabel . hasHaskellName
+
+hasHaskellName :: CLabel -> Maybe Name
+hasHaskellName (IdLabel n _ _) = Just n
+hasHaskellName _ = Nothing
+
-- -----------------------------------------------------------------------------
--- Does a CLabel refer to a CAF?
+-- Does a CLabel's referent itself refer to a CAF?
hasCAF :: CLabel -> Bool
+hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
hasCAF (IdLabel _ MayHaveCafRefs _) = True
hasCAF _ = False
+-- Note [ticky for LNE]
+-- ~~~~~~~~~~~~~~~~~~~~~
+
+-- Until 14 Feb 2013, every ticky counter was associated with a
+-- closure. Thus, ticky labels used IdLabel. It is odd that
+-- CmmBuildInfoTables.cafTransfers would consider such a ticky label
+-- reason to add the name to the CAFEnv (and thus eventually the SRT),
+-- but it was harmless because the ticky was only used if the closure
+-- was also.
+--
+-- Since we now have ticky counters for LNEs, it is no longer the case
+-- that every ticky counter has an actual closure. So I changed the
+-- generation of ticky counters' CLabels to not result in their
+-- associated id ending up in the SRT.
+--
+-- NB IdLabel is still appropriate for ticky ids (as opposed to
+-- CmmLabel) because the LNE's counter is still related to an .hs Id,
+-- that Id just isn't for a proper closure.
-- -----------------------------------------------------------------------------
-- Does a CLabel need declaring before use or not?
@@ -1051,8 +1074,8 @@ pprCLbl (CmmLabel _ fs CmmClosure)
pprCLbl (RtsLabel (RtsPrimOp primop))
= ptext (sLit "stg_") <> ppr primop
-pprCLbl (RtsLabel (RtsSlowTickyCtr pat))
- = ptext (sLit "SLOW_CALL_") <> text pat <> ptext (sLit "_ctr")
+pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat))
+ = ptext (sLit "SLOW_CALL_fast_") <> text pat <> ptext (sLit "_ctr")
pprCLbl (ForeignLabel str _ _ _)
= ftext str
View
@@ -15,6 +15,7 @@ module CmmType
, rEP_CostCentreStack_mem_alloc
, rEP_CostCentreStack_scc_count
, rEP_StgEntCounter_allocs
+ , rEP_StgEntCounter_allocd
, ForeignHint(..)
@@ -337,6 +338,11 @@ rEP_StgEntCounter_allocs dflags
= cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
where pc = sPlatformConstants (settings dflags)
+rEP_StgEntCounter_allocd :: DynFlags -> CmmType
+rEP_StgEntCounter_allocd dflags
+ = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
+ where pc = sPlatformConstants (settings dflags)
+
-------------------------------------------------------------------------
{- Note [Signed vs unsigned]
~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -0,0 +1,135 @@
+-----------------------------------------------------------------------------
+--
+-- Argument representations used in StgCmmLayout.
+--
+-- (c) The University of Glasgow 2013
+--
+-----------------------------------------------------------------------------
+
+module StgCmmArgRep (
+ ArgRep(..), toArgRep, argRepSizeW,
+
+ argRepString, isNonV, idArgRep,
+
+ slowCallPattern,
+
+ ) where
+
+import StgCmmClosure ( idPrimRep )
+
+import SMRep ( WordOff )
+import Id ( Id )
+import TyCon ( PrimRep(..), primElemRepSizeB )
+import BasicTypes ( RepArity )
+import Constants ( wORD64_SIZE )
+import DynFlags
+
+import Outputable
+import FastString
+
+-- I extricated this code as this new module in order to avoid a
+-- cyclic dependency between StgCmmLayout and StgCmmTicky.
+--
+-- NSF 18 Feb 2013
+
+-------------------------------------------------------------------------
+-- Classifying arguments: ArgRep
+-------------------------------------------------------------------------
+
+-- ArgRep is re-exported by StgCmmLayout, but only for use in the
+-- byte-code generator which also needs to know about the
+-- classification of arguments.
+
+data ArgRep = P -- GC Ptr
+ | N -- Word-sized non-ptr
+ | L -- 64-bit non-ptr (long)
+ | V -- Void
+ | F -- Float
+ | D -- Double
+ | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
+instance Outputable ArgRep where ppr = text . argRepString
+
+argRepString :: ArgRep -> String
+argRepString P = "P"
+argRepString N = "N"
+argRepString L = "L"
+argRepString V = "V"
+argRepString F = "F"
+argRepString D = "D"
+argRepString V16 = "V16"
+
+toArgRep :: PrimRep -> ArgRep
+toArgRep VoidRep = V
+toArgRep PtrRep = P
+toArgRep IntRep = N
+toArgRep WordRep = N
+toArgRep AddrRep = N
+toArgRep Int64Rep = L
+toArgRep Word64Rep = L
+toArgRep FloatRep = F
+toArgRep DoubleRep = D
+toArgRep (VecRep len elem)
+ | len*primElemRepSizeB elem == 16 = V16
+ | otherwise = error "toArgRep: bad vector primrep"
+
+isNonV :: ArgRep -> Bool
+isNonV V = False
+isNonV _ = True
+
+argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words
+argRepSizeW _ N = 1
+argRepSizeW _ P = 1
+argRepSizeW _ F = 1
+argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
+argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
+argRepSizeW _ V = 0
+argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
+
+idArgRep :: Id -> ArgRep
+idArgRep = toArgRep . idPrimRep
+
+-- This list of argument patterns should be kept in sync with at least
+-- the following:
+--
+-- * StgCmmLayout.stdPattern maybe to some degree?
+--
+-- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast)
+-- declarations in includes/stg/MiscClosures.h
+--
+-- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h,
+--
+-- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h,
+--
+-- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c,
+--
+-- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and
+-- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c
+--
+-- There may be more places that I haven't found; I merely igrep'd for
+-- pppppp and excluded things that seemed ghci-specific.
+--
+-- Also, it seems at the moment that ticky counters with void
+-- arguments will never be bumped, but I'm still declaring those
+-- counters, defensively.
+--
+-- NSF 6 Mar 2013
+
+-- These cases were found to cover about 99% of all slow calls:
+slowCallPattern :: [ArgRep] -> (FastString, RepArity)
+-- Returns the generic apply function and arity
+slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
+slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5)
+slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4)
+slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4)
+slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3)
+slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3)
+slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2)
+slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2)
+slowCallPattern (P: _) = (fsLit "stg_ap_p", 1)
+slowCallPattern (V: _) = (fsLit "stg_ap_v", 1)
+slowCallPattern (N: _) = (fsLit "stg_ap_n", 1)
+slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
+slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
+slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
+slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
+slowCallPattern [] = (fsLit "stg_ap_0", 0)
@@ -296,7 +296,7 @@ mkRhsClosure dflags bndr _cc _bi
(StgApp fun_id args)
| args `lengthIs` (arity-1)
- && all (isGcPtrRep . idPrimRep . stripNV) fvs
+ && all (isGcPtrRep . idPrimRep . unsafe_stripNV) fvs
&& isUpdatable upd_flag
&& arity <= mAX_SPEC_AP_SIZE dflags
&& not (gopt Opt_SccProfilingOn dflags)
@@ -344,7 +344,7 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body
fv_details :: [(NonVoid Id, VirtualHpOffset)]
(tot_wds, ptr_wds, fv_details)
= mkVirtHeapOffsets dflags (isLFThunk lf_info)
- (addIdReps (map stripNV reduced_fvs))
+ (addIdReps (map unsafe_stripNV reduced_fvs))
closure_info = mkClosureInfo dflags False -- Not static
bndr lf_info tot_wds ptr_wds
descr
@@ -369,11 +369,6 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body
-- RETURN
; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
--- Use with care; if used inappropriately, it could break invariants.
-stripNV :: NonVoid a -> a
-stripNV (NonVoid a) = a
-
-------------------------
cgRhsStdThunk
:: Id
@@ -418,10 +413,10 @@ mkClosureLFInfo :: Id -- The binder
-> [Id] -- Args
-> FCode LambdaFormInfo
mkClosureLFInfo bndr top fvs upd_flag args
- | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag)
+ | null args = return (mkLFThunk (idType bndr) top (map unsafe_stripNV fvs) upd_flag)
| otherwise =
do { arg_descr <- mkArgDescr (idName bndr) args
- ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) }
+ ; return (mkLFReEntrant top (map unsafe_stripNV fvs) args arg_descr) }
------------------------------------------------------------------------
@@ -453,20 +448,16 @@ closureCodeBody :: Bool -- whether this is a top-level binding
closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
| arity == 0 -- No args i.e. thunk
- = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
+ = withNewTickyCounterThunk cl_info $
+ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
\(_, node, _) -> thunkCode cl_info fv_details cc node arity body
where
lf_info = closureLFInfo cl_info
info_tbl = mkCmmInfo cl_info
closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
= -- Note: args may be [], if all args are Void
- do { -- Allocate the global ticky counter,
- -- and establish the ticky-counter
- -- label for this block
- let ticky_ctr_lbl = closureRednCountsLabel cl_info
- ; emitTickyCounter cl_info (map stripNV args)
- ; setTickyCtrLabel ticky_ctr_lbl $ do
+ withNewTickyCounterFun (closureName cl_info) args $ do {
; let
lf_info = closureLFInfo cl_info
@@ -479,20 +470,20 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
{ mkSlowEntryCode bndr cl_info arg_regs
; dflags <- getDynFlags
- ; let lf_info = closureLFInfo cl_info
- node_points = nodeMustPointToIt dflags lf_info
+ ; let node_points = nodeMustPointToIt dflags lf_info
node' = if node_points then Just node else Nothing
- ; tickyEnterFun cl_info
- ; enterCostCentreFun cc
- (CmmMachOp (mo_wordSub dflags)
- [ CmmReg nodeReg
- , mkIntExpr dflags (funTag dflags cl_info) ])
; when node_points (ldvEnterClosure cl_info)
; granYield arg_regs node_points
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
- { fv_bindings <- mapM bind_fv fv_details
+ { -- ticky after heap check to avoid double counting
+ tickyEnterFun cl_info
+ ; enterCostCentreFun cc
+ (CmmMachOp (mo_wordSub dflags)
+ [ CmmReg nodeReg
+ , mkIntExpr dflags (funTag dflags cl_info) ])
+ ; fv_bindings <- mapM bind_fv fv_details
-- Load free vars out of closure *after*
-- heap check, to reduce live vars over check
; when node_points $ load_fvs node lf_info fv_bindings
@@ -545,7 +536,6 @@ thunkCode cl_info fv_details _cc node arity body
= do { dflags <- getDynFlags
; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
node' = if node_points then Just node else Nothing
- ; tickyEnterThunk cl_info
; ldvEnterClosure cl_info -- NB: Node always points when profiling
; granThunk node_points
@@ -562,7 +552,8 @@ thunkCode cl_info fv_details _cc node arity body
-- that cc of enclosing scope will be recorded
-- in update frame CAF/DICT functions will be
-- subsumed by this enclosing cc
- do { enterCostCentreThunk (CmmReg nodeReg)
+ do { tickyEnterThunk cl_info
+ ; enterCostCentreThunk (CmmReg nodeReg)
; let lf_info = closureLFInfo cl_info
; fv_bindings <- mapM bind_fv fv_details
; load_fvs node lf_info fv_bindings
Oops, something went wrong.

0 comments on commit 460abd7

Please sign in to comment.