Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

[project @ 2001-11-22 14:25:11 by simonmar]

Retainer Profiling / Lag-drag-void profiling.

This is mostly work by Sungwoo Park, who spent a summer internship at
MSR Cambridge this year implementing these two types of heap profiling
in GHC.

Relative to Sungwoo's original work, I've made some improvements to
the code:

   - it's now possible to apply constraints to retainer and LDV profiles
     in the same way as we do for other types of heap profile (eg.
     +RTS -hc{foo,bar} -hR -RTS gives you a retainer profiling considering
     only closures with cost centres 'foo' and 'bar').

   - the heap-profile timer implementation is cleaned up.

   - heap profiling no longer has to be run in a two-space heap.

   - general cleanup of the code and application of the SDM C coding
     style guidelines.

Profiling will be a little slower and require more space than before,
mainly because closures have an extra header word to support either
retainer profiling or LDV profiling (you can't do both at the same
time).

We've used the new profiling tools on GHC itself, with moderate
success.  Fixes for some space leaks in GHC to follow...
  • Loading branch information...
commit db61851c5472bf565cd1da900b33d6e033fd743d 1 parent a88cde3
simonmar authored
Showing with 5,198 additions and 417 deletions.
  1. +35 −3 ghc/includes/ClosureMacros.h
  2. +6 −2 ghc/includes/Closures.h
  3. +3 −1 ghc/includes/Stg.h
  4. +132 −0 ghc/includes/StgLdvProf.h
  5. +25 −8 ghc/includes/StgMacros.h
  6. +1 −4 ghc/includes/StgProf.h
  7. +75 −0 ghc/includes/StgRetainerProf.h
  8. +4 −2 ghc/includes/Updates.h
  9. +14 −4 ghc/rts/Exception.hc
  10. +83 −6 ghc/rts/GC.c
  11. +1 −2  ghc/rts/HeapStackCheck.hc
  12. +9 −1 ghc/rts/Itimer.c
  13. +2 −7 ghc/rts/Itimer.h
  14. +857 −0 ghc/rts/LdvProfile.c
  15. +63 −0 ghc/rts/LdvProfile.h
  16. +19 −1 ghc/rts/PrimOps.hc
  17. +236 −213 ghc/rts/ProfHeap.c
  18. +5 −4 ghc/rts/ProfHeap.h
  19. +35 −39 ghc/rts/Profiling.c
  20. +7 −1 ghc/rts/Profiling.h
  21. +54 −8 ghc/rts/Proftimer.c
  22. +9 −7 ghc/rts/Proftimer.h
  23. +2,303 −0 ghc/rts/RetainerProfile.c
  24. +29 −0 ghc/rts/RetainerProfile.h
  25. +587 −0 ghc/rts/RetainerSet.c
  26. +139 −0 ghc/rts/RetainerSet.h
  27. +53 −17 ghc/rts/RtsFlags.c
  28. +38 −6 ghc/rts/RtsStartup.c
  29. +54 −7 ghc/rts/Schedule.c
  30. +10 −1 ghc/rts/Schedule.h
  31. +103 −13 ghc/rts/Stats.c
  32. +16 −1 ghc/rts/Stats.h
  33. +57 −27 ghc/rts/StgMiscClosures.hc
  34. +3 −3 ghc/rts/StgStartup.hc
  35. +12 −2 ghc/rts/StgStdThunks.hc
  36. +31 −19 ghc/rts/Storage.c
  37. +53 −2 ghc/rts/Storage.h
  38. +14 −4 ghc/rts/Updates.hc
  39. +21 −2 ghc/rts/Weak.c
View
38 ghc/includes/ClosureMacros.h
@@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
- * $Id: ClosureMacros.h,v 1.32 2001/02/06 11:41:04 rrt Exp $
+ * $Id: ClosureMacros.h,v 1.33 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -79,8 +79,39 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
-------------------------------------------------------------------------- */
#ifdef PROFILING
-#define SET_PROF_HDR(c,ccs_) (c)->header.prof.ccs = ccs_
-#define SET_STATIC_PROF_HDR(ccs_) prof : { ccs : ccs_ },
+#ifdef DEBUG_RETAINER
+/*
+ For the sake of debugging, we take the safest way for the moment. Actually, this
+ is useful to check the sanity of heap before beginning retainer profiling.
+ flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h.
+ Note: change those functions building Haskell objects from C datatypes, i.e.,
+ all rts_mk???() functions in RtsAPI.c, as well.
+ */
+extern StgWord flip;
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip))
+#else
+/*
+ For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to
+ NULL | flip (flip is defined in RetainerProfile.c) because even when flip
+ is 1, rs is invalid and will be initialized to NULL | flip later when
+ the closure *c is visited.
+ */
+/*
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL)
+ */
+/*
+ The following macro works for both retainer profiling and LDV profiling:
+ for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0.
+ See the invariants on ldvTime.
+ */
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, \
+ LDV_recordCreate((c)))
+#endif // DEBUG_RETAINER
+#define SET_STATIC_PROF_HDR(ccs_) \
+ prof : { ccs : ccs_, hp : { rs : NULL } },
#else
#define SET_PROF_HDR(c,ccs)
#define SET_STATIC_PROF_HDR(ccs)
@@ -109,6 +140,7 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
#define SET_TICKY_HDR(c,stuff)
#define SET_STATIC_TICKY_HDR(stuff)
#endif
+
#define SET_HDR(c,info,ccs) \
{ \
SET_INFO(c,info); \
View
8 ghc/includes/Closures.h
@@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
- * $Id: Closures.h,v 1.28 2001/10/03 13:57:42 simonmar Exp $
+ * $Id: Closures.h,v 1.29 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -20,7 +20,11 @@
-------------------------------------------------------------------------- */
typedef struct {
- CostCentreStack *ccs;
+ CostCentreStack *ccs;
+ union {
+ RetainerSet *rs; // Retainer Set
+ StgWord ldvw; // Lag/Drag/Void Word
+ } hp;
} StgProfHeader;
/* -----------------------------------------------------------------------------
View
4 ghc/includes/Stg.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Stg.h,v 1.39 2001/10/27 21:44:54 sof Exp $
+ * $Id: Stg.h,v 1.40 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -150,6 +150,8 @@ typedef StgWord64 LW_;
/* Profiling information */
#include "StgProf.h"
+#include "StgRetainerProf.h"
+#include "StgLdvProf.h"
/* Storage format definitions */
#include "Closures.h"
View
132 ghc/includes/StgLdvProf.h
@@ -0,0 +1,132 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgLdvProf.h,v 1.1 2001/11/22 14:25:11 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGLDVPROF_H
+#define STGLDVPROF_H
+
+#ifdef PROFILING
+
+// Engine
+
+// declared in LdvProfile.c
+extern nat ldvTime;
+
+// LdvGenInfo stores the statistics for one specific census.
+typedef struct {
+ double time; // the time in MUT time at the corresponding census is made
+
+ // We employ int instead of nat, for some values may be negative temporarily,
+ // e.g., dragNew.
+
+ // computed at each census
+ int inherentlyUsed; // total size of 'inherently used' closures
+ int notUsed; // total size of 'never used' closures
+ int used; // total size of 'used at least once' closures
+
+ /*
+ voidNew and dragNew are updated when a closure is destroyed.
+ For instance, when a 'never used' closure of size s and creation time
+ t is destroyed at time u, voidNew of eras t through u - 1 is increased
+ by s.
+ Likewise, when a 'used at least once' closure of size s and last use time
+ t is destroyed at time u, dragNew of eras t + 1 through u - 1 is increase
+ by s.
+ In our implementation, voidNew and dragNew are computed indirectly: instead
+ of updating voidNew or dragNew of all intervening eras, we update that
+ of the end two eras (one is increased and the other is decreased).
+ */
+ int voidNew; // current total size of 'destroyed without being used' closures
+ int dragNew; // current total size of 'used at least once and waiting to die'
+ // closures
+
+ // computed post-mortem
+ int voidTotal; // total size of closures in 'void' state
+ // lagTotal == notUsed - voidTotal // in 'lag' state
+ int dragTotal; // total size of closures in 'drag' state
+ // useTotal == used - dragTotal // in 'use' state
+} LdvGenInfo;
+
+extern LdvGenInfo *gi;
+
+// retrieves the LDV word from closure c
+#define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw)
+
+/*
+ An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation
+ time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK).
+ */
+#if SIZEOF_VOID_P == 8
+#define LDV_SHIFT 30
+#define LDV_STATE_MASK 0x1000000000000000
+#define LDV_CREATE_MASK 0x0FFFFFFFC0000000
+#define LDV_LAST_MASK 0x000000003FFFFFFF
+#define LDV_STATE_CREATE 0x0000000000000000
+#define LDV_STATE_USE 0x1000000000000000
+#else
+#define LDV_SHIFT 15
+#define LDV_STATE_MASK 0x40000000
+#define LDV_CREATE_MASK 0x3FFF8000
+#define LDV_LAST_MASK 0x00007FFF
+#define LDV_STATE_CREATE 0x00000000
+#define LDV_STATE_USE 0x40000000
+#endif // SIZEOF_VOID_P
+
+// Stores the creation time for closure c.
+// This macro is called at the very moment of closure creation.
+//
+// NOTE: this initializes LDVW(c) to zero, which ensures that there
+// is no conflict between retainer profiling and LDV profiling,
+// because retainer profiling also expects LDVW(c) to be initialised
+// to zero.
+#define LDV_recordCreate(c) \
+ LDVW((c)) = (ldvTime << LDV_SHIFT) | LDV_STATE_CREATE
+
+// Stores the last use time for closure c.
+// This macro *must* be called whenever a closure is used, that is, it is
+// entered.
+#define LDV_recordUse(c) \
+ { \
+ if (ldvTime > 0) \
+ LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | \
+ ldvTime | \
+ LDV_STATE_USE; \
+ }
+
+// Creates a 0-filled slop of size 'howManyBackwards' backwards from the
+// address 'from'.
+//
+// Invoked when:
+// 1) Hp is incremented and exceeds HpLim (in Updates.hc).
+// 2) copypart() is called (in GC.c).
+#define FILL_SLOP(from, howManyBackwards) \
+ if (ldvTime > 0) { \
+ int i; \
+ for (i = 0;i < (howManyBackwards); i++) \
+ ((StgWord *)(from))[-i] = 0; \
+ }
+
+// Informs the LDV profiler that closure c has just been evacuated.
+// Evacuated objects are no longer needed, so we just store its original size in
+// the LDV field.
+#define SET_EVACUAEE_FOR_LDV(c, size) \
+ LDVW((c)) = (size)
+
+// Macros called when a closure is entered.
+// The closure is not an 'inherently used' one.
+// The closure is not IND or IND_OLDGEN because neither is considered for LDV
+// profiling.
+#define LDV_ENTER(c) LDV_recordUse((c))
+
+#else // !PROFILING
+
+#define LDV_ENTER(c)
+
+#endif // PROFILING
+#endif // STGLDVPROF_H
View
33 ghc/includes/StgMacros.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.41 2001/11/08 16:37:54 simonmar Exp $
+ * $Id: StgMacros.h,v 1.42 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -144,7 +144,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
tag_assts \
(r) = (P_)ret; \
JMP_(stg_chk_##layout); \
- }
+ }
#define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
DO_GRAN_ALLOCATE(hp_headroom) \
@@ -153,7 +153,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
tag_assts \
(r) = (P_)ret; \
JMP_(stg_chk_##layout); \
- }
+ }
/* -----------------------------------------------------------------------------
A Heap Check in a case alternative are much simpler: everything is
@@ -186,7 +186,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(stg_gc_enter_##ptrs); \
- }
+ }
#define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts) \
DO_GRAN_ALLOCATE(headroom) \
@@ -194,7 +194,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(stg_gc_seq_##ptrs); \
- }
+ }
#define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
DO_GRAN_ALLOCATE(hp_headroom) \
@@ -202,7 +202,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (hp_headroom); \
tag_assts \
JMP_(stg_gc_enter_##ptrs); \
- }
+ }
/* Heap checks for branches of a primitive case / unboxed tuple return */
@@ -214,7 +214,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(lbl); \
- }
+ }
#define HP_CHK_NOREGS(headroom,tag_assts) \
GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
@@ -298,7 +298,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
R9.w = (W_)LIVENESS_MASK(liveness); \
R10.w = (W_)reentry; \
JMP_(stg_gen_chk); \
- }
+ }
#define HP_CHK_GEN_TICKY(headroom,liveness,reentry,tag_assts) \
HP_CHK_GEN(headroom,liveness,reentry,tag_assts); \
@@ -435,12 +435,29 @@ EXTINFO_RTS(stg_gen_chk_info);
} \
SET_INFO(R1.cl,&stg_BLACKHOLE_info)
# else
+# ifndef PROFILING
# define UPD_BH_UPDATABLE(info) \
TICK_UPD_BH_UPDATABLE(); \
SET_INFO(R1.cl,&stg_BLACKHOLE_info)
# define UPD_BH_SINGLE_ENTRY(info) \
TICK_UPD_BH_SINGLE_ENTRY(); \
SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)
+# else
+// An object is replaced by a blackhole, so we fill the slop with zeros.
+//
+// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+//
+# define UPD_BH_UPDATABLE(info) \
+ TICK_UPD_BH_UPDATABLE(); \
+ LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \
+ SET_INFO(R1.cl,&stg_BLACKHOLE_info); \
+ LDV_recordCreate(R1.cl)
+# define UPD_BH_SINGLE_ENTRY(info) \
+ TICK_UPD_BH_SINGLE_ENTRY(); \
+ LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \
+ SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info) \
+ LDV_recordCreate(R1.cl)
+# endif /* PROFILING */
# endif
#else /* !EAGER_BLACKHOLING */
# define UPD_BH_UPDATABLE(thunk) /* nothing */
View
5 ghc/includes/StgProf.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgProf.h,v 1.13 2001/10/18 13:46:47 simonmar Exp $
+ * $Id: StgProf.h,v 1.14 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998
*
@@ -349,9 +349,6 @@ extern CostCentreStack *CCS_LIST; /* registered CCS list */
#define ENTER_CCS_PAP_CL(closure) \
ENTER_CCS_PAP((closure)->header.prof.ccs)
- /* temp EW */
-#define STATIC_CCS_REF(ccs) (ccs)
-
/* -----------------------------------------------------------------------------
When not profiling, these macros do nothing...
-------------------------------------------------------------------------- */
View
75 ghc/includes/StgRetainerProf.h
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgRetainerProf.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ *
+ * Retainer profiling
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGRETAINERPROF_H
+#define STGRETAINERPROF_H
+
+/*
+ Type 'retainer' defines the retainer identity.
+
+ Invariant:
+ 1. The retainer identity of a given retainer cannot change during
+ program execution, no matter where it is actually stored.
+ For instance, the memory address of a retainer cannot be used as
+ its retainer identity because its location may change during garbage
+ collections.
+ 2. Type 'retainer' must come with comparison operations as well as
+ an equality operation. That it, <, >, and == must be supported -
+ this is necessary to store retainers in a sorted order in retainer sets.
+ Therefore, you cannot use a huge structure type as 'retainer', for instance.
+
+ We illustrate three possibilities of defining 'retainer identity'.
+ Choose one of the following three compiler directives:
+
+ Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table
+ Retainer scheme 2 (RETAINER_SCHEME_CCS) : retainer = cost centre stack
+ Retainer scheme 3 (RETAINER_SCHEME_CC) : retainer = cost centre
+*/
+
+// #define RETAINER_SCHEME_INFO
+#define RETAINER_SCHEME_CCS
+// #define RETAINER_SCHEME_CC
+
+#ifdef RETAINER_SCHEME_INFO
+struct _StgInfoTable;
+typedef struct _StgInfoTable *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CCS
+typedef CostCentreStack *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CC
+typedef CostCentre *retainer;
+#endif
+
+/*
+ Type 'retainerSet' defines an abstract datatype for sets of retainers.
+
+ Invariants:
+ A retainer set stores its elements in increasing order (in element[] array).
+ */
+
+typedef struct _RetainerSet {
+ nat num; // number of elements
+ nat cost; // cost associated with this retainer set
+ StgWord hashKey; // hash key for this retainer set
+ struct _RetainerSet *link; // link to the next retainer set in the bucket
+ int id; // unique id of this retainer set (used when printing)
+ // Its absolute value is interpreted as its true id; if id is
+ // negative, it indicates that this retainer set has had a postive
+ // cost after some retainer profiling.
+ retainer element[0]; // elements of this retainer set
+ // do not put anything below here!
+} RetainerSet;
+
+//
+// retainerSet - interface: see rts/RetainerSet.h
+//
+
+#endif /* STGRETAINERPROF_H */
View
6 ghc/includes/Updates.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Updates.h,v 1.25 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Updates.h,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -178,7 +178,9 @@ extern void awakenBlockedQueue(StgTSO *q);
------------------------------------------------------------------------- */
#if defined(PROFILING)
-#define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS
+// frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) is unnecessary
+// because it is not used anyhow.
+#define PUSH_STD_CCCS(frame) (frame->header.prof.ccs = CCCS)
#else
#define PUSH_STD_CCCS(frame)
#endif
View
18 ghc/rts/Exception.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Exception.hc,v 1.21 2001/08/17 14:44:54 simonmar Exp $
+ * $Id: Exception.hc,v 1.22 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -260,8 +260,8 @@ CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_5_entry,RET_VEC(Sp[SP_OFF],5));
CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_6_entry,RET_VEC(Sp[SP_OFF],6));
CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_7_entry,RET_VEC(Sp[SP_OFF],7));
-#ifdef PROFILING
-#define CATCH_FRAME_BITMAP 7
+#if defined(PROFILING)
+#define CATCH_FRAME_BITMAP 15
#else
#define CATCH_FRAME_BITMAP 3
#endif
@@ -355,7 +355,7 @@ FN_(raisezh_fast)
* the info was only displayed for an *uncaught* exception.
*/
if (RtsFlags.ProfFlags.showCCSOnException) {
- STGCALL2(print_ccs,stderr,CCCS);
+ STGCALL2(fprintCCS,stderr,CCCS);
}
#endif
@@ -365,8 +365,18 @@ FN_(raisezh_fast)
* is the exception raise. It is used to overwrite all the
* thunks which are currently under evaluataion.
*/
+ /*
+ // @LDV profiling
+ // stg_raise_info has THUNK as its closure type. Since a THUNK takes at least
+ // MIN_UPD_SIZE words in its payload, MIN_UPD_SIZE is more approprate than 1.
+ // It seems that 1 does not cause any problem unless profiling is performed.
+ // However, when LDV profiling goes on, we need to linearly scan small object pool,
+ // where raise_closure is stored, so we should use MIN_UPD_SIZE.
raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
sizeofW(StgClosure)+1);
+ */
+ raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+ sizeofW(StgClosure)+MIN_UPD_SIZE);
SET_HDR(raise_closure, &stg_raise_info, CCCS);
raise_closure->payload[0] = R1.cl;
View
89 ghc/rts/GC.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.126 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: GC.c,v 1.127 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team 1998-1999
*
@@ -42,6 +42,9 @@
#include "FrontPanel.h"
#endif
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+
/* STATIC OBJECT LIST.
*
* During GC:
@@ -602,6 +605,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
}
}
+#ifdef PROFILING
+ // We call processHeapClosureForDead() on every closure destroyed during
+ // the current garbage collection, so we invoke LdvCensusForDead().
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+ LdvCensusForDead(N);
+#endif
+
// NO MORE EVACUATION AFTER THIS POINT!
// Finally: compaction of the oldest generation.
if (major_gc && oldest_gen->steps[0].is_compacted) {
@@ -933,6 +943,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
if (major_gc) { gcCAFs(); }
#endif
+#ifdef PROFILING
+ // resetStaticObjectForRetainerProfiling() must be called before
+ // zeroing below.
+ resetStaticObjectForRetainerProfiling();
+#endif
+
// zero the scavenged static object list
if (major_gc) {
zero_static_object_list(scavenged_static_objects);
@@ -963,7 +979,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
// restore enclosing cost centre
#ifdef PROFILING
- heapCensus();
CCCS = prev_CCS;
#endif
@@ -1271,6 +1286,10 @@ static __inline__ StgClosure *
copy(StgClosure *src, nat size, step *stp)
{
P_ to, from, dest;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_org = size;
+#endif
TICK_GC_WORDS_COPIED(size);
/* Find out where we're going, using the handy "to" pointer in
@@ -1300,6 +1319,12 @@ copy(StgClosure *src, nat size, step *stp)
dest = stp->hp;
stp->hp = to;
upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+ // @LDV profiling
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(src, size_org);
+#endif
return (StgClosure *)dest;
}
@@ -1309,10 +1334,14 @@ copy(StgClosure *src, nat size, step *stp)
*/
-static __inline__ StgClosure *
+static StgClosure *
copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
{
P_ dest, to, from;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_to_copy_org = size_to_copy;
+#endif
TICK_GC_WORDS_COPIED(size_to_copy);
if (stp->gen_no < evac_gen) {
@@ -1334,6 +1363,17 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
dest = stp->hp;
stp->hp += size_to_reserve;
upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+ // @LDV profiling
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ // size_to_copy_org is wrong because the closure already occupies size_to_reserve
+ // words.
+ SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
+ // fill the slop
+ if (size_to_reserve - size_to_copy_org > 0)
+ FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org));
+#endif
return (StgClosure *)dest;
}
@@ -2162,9 +2202,23 @@ scavenge(step *stp)
}
case IND_PERM:
- if (stp->gen_no != 0) {
- SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
- }
+ if (stp->gen->no != 0) {
+#ifdef PROFILING
+ // @LDV profiling
+ // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an
+ // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+ LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
+ SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that p has just been created.
+ LDV_recordCreate((StgClosure *)p);
+#endif
+ }
// fall through
case IND_OLDGEN_PERM:
((StgIndOldGen *)p)->indirectee =
@@ -3590,7 +3644,17 @@ threadLazyBlackHole(StgTSO *tso)
#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
belch("Unexpected lazy BHing required at 0x%04x",(int)bh);
#endif
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh is now dead.
+ LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh has just been created.
+ LDV_recordCreate(bh);
+#endif
}
update_frame = update_frame->link;
@@ -3832,7 +3896,20 @@ threadSqueezeStack(StgTSO *tso)
}
}
#endif
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh is now dead.
+ LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh has just been created.
+ LDV_recordCreate(bh);
+#endif
}
}
View
3  ghc/rts/HeapStackCheck.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: HeapStackCheck.hc,v 1.18 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: HeapStackCheck.hc,v 1.19 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -47,7 +47,6 @@
* ThreadRunGHC thread.
*/
-
#define GC_GENERIC \
if (Hp > HpLim) { \
Hp -= HpAlloc; \
View
10 ghc/rts/Itimer.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Itimer.c,v 1.25 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1995-1999
*
@@ -142,6 +142,8 @@ initialize_virtual_timer(nat ms)
}
}
+ initProfTimer();
+
return 0;
}
@@ -158,6 +160,10 @@ initialize_virtual_timer(nat ms)
timestamp = getourtimeofday();
+#ifdef PROFILING
+ initProfTimer();
+#endif
+
it.it_value.tv_sec = ms / 1000;
it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec));
it.it_interval = it.it_value;
@@ -178,6 +184,8 @@ initialize_virtual_timer(nat ms)
timestamp = getourtimeofday();
+ initProfTimer();
+
se.sigev_notify = SIGEV_SIGNAL;
se.sigev_signo = SIGVTALRM;
se.sigev_value.sival_int = SIGVTALRM;
View
9 ghc/rts/Itimer.h
@@ -1,7 +1,7 @@
/* -----------------------------------------------------------------------------
- * $Id: Itimer.h,v 1.8 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.h,v 1.9 2001/11/22 14:25:12 simonmar Exp $
*
- * (c) The GHC Team 1998-1999
+ * (c) The GHC Team 1998-2001
*
* Interval timer for profiling and pre-emptive scheduling.
*
@@ -15,11 +15,6 @@
*/
#define CS_MIN_MILLISECS TICK_MILLISECS /* milliseconds per slice */
-extern rtsBool do_prof_ticks; /* profiling ticks on/off */
-
-/* Total number of ticks since startup */
-extern lnat total_ticks;
-
int initialize_virtual_timer ( nat ms );
int install_vtalrm_handler ( void );
void block_vtalrm_signal ( void );
View
857 ghc/rts/LdvProfile.c
@@ -0,0 +1,857 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Stg.h"
+#include "Rts.h"
+#include "LdvProfile.h"
+#include "RtsFlags.h"
+#include "Itimer.h"
+#include "Proftimer.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "Storage.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+
+/*
+ ldvTime stores the current LDV time, that is, the current era. It
+ is one larger than the number of times LDV profiling has been
+ performed, i.e.,
+ ldvTime - 1 == the number of time LDV profiling was executed
+ == the number of censuses made so far.
+ RESTRICTION:
+ ldvTime must be no longer than LDV_SHIFT (15 or 30) bits.
+ Invariants:
+ LDV profiling is turned off if ldvTime is 0.
+ LDV profiling is turned on if ldvTime is > 0.
+ ldvTime is initialized to 1 in initLdvProfiling().
+ If LDV profiling is not performed, ldvTime must remain 0 (e.g., when we
+ are doing retainer profiling).
+ ldvTime is set to 1 in initLdvProfiling().
+ ldvTime is set back to 0 in shutdownHaskell().
+ In the meanwhile, ldvTime increments.
+*/
+nat ldvTime = 0;
+#
+// ldvTimeSave is set in LdvCensusKillAll(), and stores the final number of
+// times that LDV profiling was proformed.
+static nat ldvTimeSave;
+
+// gi[] stores the statistics obtained at each heap census.
+// gi[0] is not used. See initLdvProfiling().
+LdvGenInfo *gi;
+
+#define giINCREMENT 32 // allocation unit for gi[]
+static nat giLength; // current length of gi[]
+
+// giMax is initialized to 2^LDV_SHIFT in initLdvProfiling().
+// When ldvTime reaches giMax, the profiling stops because a closure can
+// store only up to (giMax - 1) as its creation or last use time.
+static nat giMax;
+
+/* --------------------------------------------------------------------------
+ * Fills in the slop when a *dynamic* closure changes its type.
+ * First calls LDV_recordDead() to declare the closure is dead, and then
+ * fills in the slop.
+ *
+ * Invoked when:
+ * 1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in
+ * includes/StgMacros.h), threadLazyBlackHole() and
+ * threadSqueezeStack() (in GC.c).
+ * 2) updating with indirection closures, updateWithIndirection()
+ * and updateWithPermIndirection() (in Storage.h).
+ *
+ * LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used'
+ * closures such as TSO. It is not called on PAP because PAP is not updatable.
+ * ----------------------------------------------------------------------- */
+void
+LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p )
+{
+ if (ldvTime > 0) {
+ StgInfoTable *inf = get_itbl((p));
+ nat nw, i;
+ switch (inf->type) {
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ nw = MIN_UPD_SIZE;
+ break;
+ case THUNK:
+ nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+ if (nw < MIN_UPD_SIZE)
+ nw = MIN_UPD_SIZE;
+ break;
+ case AP_UPD:
+ nw = sizeofW(StgPAP) - sizeofW(StgHeader) + ((StgPAP *)p)->n_args;
+ break;
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+ break;
+ default:
+ barf("Unexpected closure type %u in LDV_recordDead_FILL_SLOP_DYNAMIC()", inf->type);
+ break;
+ }
+ LDV_recordDead((StgClosure *)(p), nw + sizeofW(StgHeader));
+ for (i = 0; i < nw; i++) {
+ ((StgClosure *)(p))->payload[i] = 0;
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Initialize gi[ldvTime].
+ * ----------------------------------------------------------------------- */
+static inline void
+giInitForCurrentEra(void)
+{
+ gi[ldvTime].notUsed = 0;
+ gi[ldvTime].inherentlyUsed = 0;
+ gi[ldvTime].used = 0;
+
+ gi[ldvTime].voidNew = 0;
+ gi[ldvTime].dragNew = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * Increases ldvTime by 1 and initialize gi[ldvTime].
+ * Reallocates gi[] and increases its size if needed.
+ * ----------------------------------------------------------------------- */
+static void
+incrementLdvTime( void )
+{
+ ldvTime++;
+
+ if (ldvTime == giMax) {
+ fprintf(stderr,
+ "Lag/Drag/Void profiling limit %u reached. "
+ "Please increase the profiling interval with -L option.\n",
+ giLength);
+ barf("Current profiling interval = %f seconds",
+ (float)RtsFlags.ProfFlags.profileInterval / 1000.0 );
+ }
+
+ if (ldvTime % giINCREMENT == 0) {
+ gi = stgReallocBytes(gi, sizeof(LdvGenInfo) * (giLength + giINCREMENT),
+ "incrementLdvTime");
+ giLength += giINCREMENT;
+ }
+
+ // What a stupid bug I struggled against for such a long time! I
+ // placed giInitForCurrentEra() before the above rellocation part,
+ // and it cost me three hours!
+ giInitForCurrentEra();
+}
+
+/* --------------------------------------------------------------------------
+ * Initialization code for LDV profiling.
+ * ----------------------------------------------------------------------- */
+void
+initLdvProfiling( void )
+{
+ nat p;
+
+ gi = stgMallocBytes(sizeof(LdvGenInfo) * giINCREMENT, "initLdvProfiling");
+ giLength = giINCREMENT;
+
+ ldvTime = 1; // turn on LDV profiling.
+ giInitForCurrentEra();
+
+ // giMax = 2^LDV_SHIFT
+ giMax = 1;
+ for (p = 0; p < LDV_SHIFT; p++)
+ giMax *= 2;
+}
+
+/* --------------------------------------------------------------------------
+ * This function must be called before f-closing prof_file.
+ * Still hp_file is open; see endHeapProfiling() in ProfHeap.c.
+ * ----------------------------------------------------------------------- */
+void
+endLdvProfiling( void )
+{
+ nat t;
+ int sumVoidNew, sumDragNew;
+
+ // Now we compute voidTotal and dragTotal of each LdvGenInfo structure.
+ sumVoidNew = 0;
+ sumDragNew = 0;
+ for (t = 0; t < ldvTimeSave; t++) {
+ sumVoidNew += gi[t].voidNew;
+ sumDragNew += gi[t].dragNew;
+ gi[t].voidTotal = sumVoidNew;
+ gi[t].dragTotal = sumDragNew;
+ }
+
+ // t = 0 is wrong (because ldvTime == 0 indicates LDV profiling is
+ // turned off.
+ for (t = 1;t < ldvTimeSave; t++) {
+ fprintf(hp_file, "MARK %f\n", gi[t].time);
+ fprintf(hp_file, "BEGIN_SAMPLE %f\n", gi[t].time);
+ fprintf(hp_file, "VOID\t%u\n", gi[t].voidTotal * sizeof(StgWord));
+ fprintf(hp_file, "LAG\t%u\n", (gi[t].notUsed - gi[t].voidTotal) * sizeof(StgWord));
+ fprintf(hp_file, "USE\t%u\n", (gi[t].used - gi[t].dragTotal) * sizeof(StgWord));
+ fprintf(hp_file, "INHERENT_USE\t%u\n", gi[t].inherentlyUsed * sizeof(StgWord));
+ fprintf(hp_file, "DRAG\t%u\n", gi[t].dragTotal * sizeof(StgWord));
+ fprintf(hp_file, "END_SAMPLE %f\n", gi[t].time);
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Print the statistics.
+ * This function is called after each retainer profiling.
+ * ----------------------------------------------------------------------- */
+static void
+outputLdvSet( void )
+{
+}
+
+/* --------------------------------------------------------------------------
+ * This function is eventually called on every object in the heap
+ * during a census. Any census is initiated immediately after a major
+ * garbage collection, and we exploit this fact in the implementation.
+ * If c is an 'inherently used' closure, gi[ldvTime].inherentlyUsed is
+ * updated. If c is an ordinary closure, either gi[ldvTime].notUsed or
+ * gi[ldvTime].used is updated.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosure(StgClosure *c)
+{
+ nat size;
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+
+ ASSERT(
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0
+ );
+ ASSERT(
+ ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+ (
+ (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+ (LDVW(c) & LDV_LAST_MASK) > 0
+ )
+ );
+
+ switch (info->type) {
+ /*
+ 'inherently used' cases: add to gi[ldvTime].inherentlyUsed
+ */
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)c);
+ goto inherently_used;
+
+ case MVAR:
+ size = sizeofW(StgMVar);
+ goto inherently_used;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+ goto inherently_used;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW((StgArrWords *)c);
+ goto inherently_used;
+
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ size = sizeW_fromITBL(info);
+ goto inherently_used;
+
+ /*
+ ordinary cases: add to gi[ldvTime].notUsed if c is not being used.
+ add to gi[ldvTime].used if c is being used.
+ */
+ case THUNK:
+ size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case AP_UPD:
+ case PAP:
+ size = pap_sizeW((StgPAP *)c);
+ break;
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+
+ case BLACKHOLE_BQ:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case IND_PERM:
+ size = sizeofW(StgInd);
+ break;
+
+ case IND_OLDGEN_PERM:
+ size = sizeofW(StgIndOldGen);
+ break;
+
+ /*
+ Error case
+ */
+ case IND: // IND cannot appear after major GCs.
+ case IND_OLDGEN: // IND_OLDGEN cannot appear major GCs.
+ case EVACUATED: // EVACUATED is encountered only during GCs.
+ // static objects
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // others
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in processHeapClosure(): %d", info->type);
+ return 0;
+ }
+
+ /*
+ ordinary cases:
+ We can compute either gi[ldvTime].notUsed or gi[ldvTime].used; the other
+ can be computed from the total sum of costs.
+ At the moment, we choose to compute gi[ldvTime].notUsed, which seems to
+ be smaller than gi[ldvTime].used.
+ */
+
+ // ignore closures that don't satisfy our constraints.
+ if (closureSatisfiesConstraints(c)) {
+ if ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+ gi[ldvTime].notUsed += size - sizeofW(StgProfHeader);
+ else
+ gi[ldvTime].used += size - sizeofW(StgProfHeader);
+ }
+ return size;
+
+inherently_used:
+ // ignore closures that don't satisfy our constraints.
+ if (closureSatisfiesConstraints(c)) {
+ gi[ldvTime].inherentlyUsed += size - sizeofW(StgProfHeader);
+ }
+ return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the heap blocks
+ * begining at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processHeap( bdescr *bd )
+{
+ StgPtr p;
+ nat size;
+
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the small object pool
+ * during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPool( void )
+{
+ bdescr *bd;
+ StgPtr p;
+ nat size;
+
+ bd = small_alloc_list;
+
+ // first block
+ if (bd == NULL)
+ return;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < alloc_Hp && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == alloc_Hp);
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every (large) closure in the object
+ * chain beginning at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processChain( bdescr *bd )
+{
+ while (bd != NULL) {
+ // bd->free - bd->start is not an accurate measurement of the
+ // object size. Actually it is always zero, so we compute its
+ // size explicitly.
+ processHeapClosure((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Starts a census for LDV profiling.
+ * Invariants:
+ * Any call to LdvCensus() is preceded by a major garbage collection.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensus( void )
+{
+ nat g, s;
+
+ // ldvTime == 0 means that LDV profiling is currently turned off.
+ if (ldvTime == 0)
+ return;
+
+ stat_startLDV();
+ //
+ // Todo: when we perform LDV profiling, the Haskell mutator time seems to
+ // be affected by -S or -s runtime option. For instance, the
+ // following two options should result in nearly same
+ // profiling outputs, but the second run (without -Sstderr
+ // option) spends almost twice as long in the Haskell
+ // mutator as the first run:
+ //
+ // 1) +RTS -Sstderr -hL -RTS
+ // 2) +RTS -hL -RTS
+ //
+ // This is quite a subtle bug because this wierd phenomenon is not
+ // observed in retainer profiling, yet mut_user_time_during_LDV() is
+ // completely orthogonal to mut_user_time_during_RP(). However, the
+ // overall shapes of the resultant graphs are almost the same.
+ //
+ gi[ldvTime].time = mut_user_time_during_LDV();
+ if (RtsFlags.GcFlags.generations == 1) {
+ //
+ // Todo: support LDV for two-space garbage collection.
+ //
+ barf("Lag/Drag/Void profiling not supported with -G1");
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) {
+ // after a major GC, the nursery must be empty,
+ // and no need to call processNursery().
+ ASSERT(MainCapability.r.rNursery->start ==
+ MainCapability.r.rNursery->free);
+ processSmallObjectPool();
+ processChain(generations[g].steps[s].large_objects);
+ } else{
+ processHeap(generations[g].steps[s].blocks);
+ processChain(generations[g].steps[s].large_objects);
+ }
+ }
+ }
+ outputLdvSet(); // output to hp_file
+ stat_endLDV(); // output to prof_file
+
+ incrementLdvTime();
+}
+
+/* --------------------------------------------------------------------------
+ * This function is called eventually on every object destroyed during
+ * a garbage collection, whether it is a major garbage collection or
+ * not. If c is an 'inherently used' closure, nothing happens. If c
+ * is an ordinary closure, LDV_recordDead() is called on c with its
+ * proper size which excludes the profiling header portion in the
+ * closure. Returns the size of the closure, including the profiling
+ * header portion, so that the caller can find the next closure.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosureForDead( StgClosure *c )
+{
+ nat size;
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+
+ if (info->type != EVACUATED) {
+ ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0);
+ ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+ (
+ (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+ (LDVW(c) & LDV_LAST_MASK) > 0
+ ));
+ }
+
+ switch (info->type) {
+ /*
+ 'inherently used' cases: do nothing.
+ */
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)c);
+ return size;
+
+ case MVAR:
+ size = sizeofW(StgMVar);
+ return size;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+ return size;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW((StgArrWords *)c);
+ return size;
+
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ size = sizeW_fromITBL(info);
+ return size;
+
+ /*
+ ordinary cases: call LDV_recordDead().
+ */
+
+ case THUNK:
+ size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case AP_UPD:
+ case PAP:
+ size = pap_sizeW((StgPAP *)c);
+ break;
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+
+ case BLACKHOLE_BQ:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case IND_PERM:
+ size = sizeofW(StgInd);
+ break;
+
+ case IND_OLDGEN_PERM:
+ size = sizeofW(StgIndOldGen);
+ break;
+
+ /*
+ 'Ingore' cases
+ */
+ // Why can we ignore IND/IND_OLDGEN closures? We assume that
+ // any census is preceded by a major garbage collection, which
+ // IND/IND_OLDGEN closures cannot survive. Therefore, it is no
+ // use considering IND/IND_OLDGEN closures in the meanwhile
+ // because they will perish before the next census at any
+ // rate.
+ case IND:
+ size = sizeofW(StgInd);
+ return size;
+
+ case IND_OLDGEN:
+ size = sizeofW(StgIndOldGen);
+ return size;
+
+ case EVACUATED:
+ // The size of the evacuated closure is currently stored in
+ // the LDV field. See SET_EVACUAEE_FOR_LDV() in
+ // includes/StgLdvProf.h.
+ return LDVW(c);
+
+ /*
+ Error case
+ */
+ // static objects
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // others
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in processHeapClosureForDead(): %d", info->type);
+ return 0;
+ }
+
+ // Found a dead closure: record its size
+ LDV_recordDead(c, size);
+ return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * heap blocks starting at bd.
+ * ----------------------------------------------------------------------- */
+static void
+processHeapForDead( bdescr *bd )
+{
+ StgPtr p;
+
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the nursery.
+ * ----------------------------------------------------------------------- */
+static void
+processNurseryForDead( void )
+{
+ StgPtr p, bdLimit;
+ bdescr *bd;
+
+ bd = MainCapability.r.rNursery;
+ while (bd->start < bd->free) {
+ p = bd->start;
+ bdLimit = bd->start + BLOCK_SIZE_W;
+ while (p < bd->free && p < bdLimit) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && p < bdLimit && !*p) // skip slop
+ p++;
+ }
+ bd = bd->link;
+ if (bd == NULL)
+ break;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * small object pool.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPoolForDead( void )
+{
+ bdescr *bd;
+ StgPtr p;
+
+ bd = small_alloc_list;
+
+ // first block
+ if (bd == NULL)
+ return;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < alloc_Hp && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == alloc_Hp);
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the closure
+ * chain.
+ * ----------------------------------------------------------------------- */
+static void
+processChainForDead( bdescr *bd )
+{
+ // Any object still in the chain is dead!
+ while (bd != NULL) {
+ processHeapClosureForDead((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Start a census for *dead* closures, and calls
+ * processHeapClosureForDead() on every closure which died in the
+ * current garbage collection. This function is called from a garbage
+ * collector right before tidying up, when all dead closures are still
+ * stored in the heap and easy to identify. Generations 0 through N
+ * have just beed garbage collected.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusForDead( nat N )
+{
+ nat g, s;
+
+ // ldvTime == 0 means that LDV profiling is currently turned off.
+ if (ldvTime == 0)
+ return;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+ //
+ // Todo: support LDV for two-space garbage collection.
+ //
+ barf("Lag/Drag/Void profiling not supported with -G1");
+ } else {
+ for (g = 0; g <= N; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) {
+ processSmallObjectPoolForDead();
+ processNurseryForDead();
+ processChainForDead(generations[g].steps[s].large_objects);
+ } else{
+ processHeapForDead(generations[g].steps[s].blocks);
+ processChainForDead(generations[g].steps[s].large_objects);
+ }
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Regard any closure in the current heap as dead or moribund and update
+ * LDV statistics accordingly.
+ * Called from shutdownHaskell() in RtsStartup.c.
+ * Also, stops LDV profiling by resetting ldvTime to 0.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusKillAll( void )
+{
+ LdvCensusForDead(RtsFlags.GcFlags.generations - 1);
+
+ // record the time when LDV profiling stops.
+ ldvTimeSave = ldvTime;
+
+ // and, stops LDV profiling.
+ ldvTime = 0;
+}
+
+#endif /* PROFILING */
View
63 ghc/rts/LdvProfile.h
@@ -0,0 +1,63 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef LDVPROFILE_H
+#define LDVPROFILE_H
+
+#ifdef PROFILING
+
+#include "ProfHeap.h"
+
+void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p );
+
+// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
+// such as TSO; they should not be involved in computing dragNew or voidNew.
+//
+// Note: ldvTime is 0 if LDV profiling is turned off.
+// ldvTime is > 0 if LDV profiling is turned on.
+// size does not include StgProfHeader.
+//
+// Even though ldvTime is checked in both LdvCensusForDead() and
+// LdvCensusKillAll(), we still need to make sure that ldvTime is > 0 because
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
+// when a thunk is replaced by an indirection object.
+
+static inline void
+LDV_recordDead( StgClosure *c, nat size )
+{
+ if (ldvTime > 0 && closureSatisfiesConstraints(c)) {
+ nat t;
+ size -= sizeofW(StgProfHeader);
+ if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+ t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+ if (t < ldvTime) {
+ gi[t].voidNew += (int)size;
+ gi[ldvTime].voidNew -= (int)size;
+ }
+ } else {
+ t = LDVW((c)) & LDV_LAST_MASK;
+ if (t + 1 < ldvTime) {
+ gi[t + 1].dragNew += size;
+ gi[ldvTime].dragNew -= size;
+ }
+ }
+ }
+}
+
+extern void initLdvProfiling ( void );
+extern void endLdvProfiling ( void );
+extern void LdvCensus ( void );
+extern void LdvCensusForDead ( nat );
+extern void LdvCensusKillAll ( void );
+
+#endif /* PROFILING */
+
+#endif /* LDVPROFILE_H */
View
20 ghc/rts/PrimOps.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.84 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.85 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -420,7 +420,25 @@ FN_(finalizzeWeakzh_fast)
}
/* kill it */
+#ifdef PROFILING
+ // @LDV profiling
+ // A weak pointer is inherently used, so we do not need to call
+ // LDV_recordDead_FILL_SLOP_DYNAMIC():
+ // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
+ // or, LDV_recordDead():
+ // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
+ // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
+ // large as weak pointers, so there is no need to fill the slop, either.
+ // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
w->header.info = &stg_DEAD_WEAK_info;
+#ifdef PROFILING
+ // @LDV profiling
+ LDV_recordCreate((StgClosure *)w);
+#endif
f = ((StgWeak *)w)->finalizer;
w->link = ((StgWeak *)w)->link;
View
449 ghc/rts/ProfHeap.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: ProfHeap.c,v 1.25 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: ProfHeap.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -25,6 +25,8 @@
#include "Stats.h"
#include "Hash.h"
#include "StrHash.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
#ifdef DEBUG_HEAP_PROF
#include "Printer.h"
@@ -95,7 +97,7 @@ strToCtr(const char *str)
for (; ctr != NULL; prev = ctr, ctr = ctr->next_bucket ) {
if (!strcmp(ctr->str, str)) {
insertHashTable( str_to_ctr, (W_)str, ctr );
-#ifdef DEBUG
+#ifdef DEBUG_CTR
fprintf(stderr,"strToCtr: existing ctr for `%s'\n",str);
#endif
return ctr;
@@ -109,7 +111,7 @@ strToCtr(const char *str)
ctr->next = all_ctrs;
all_ctrs = ctr;
-#ifdef DEBUG
+#ifdef DEBUG_CTR
fprintf(stderr,"strToCtr: new ctr for `%s'\n",str);
#endif
@@ -175,23 +177,17 @@ initHeapProfiling(void)
fprintf(hp_file, "JOB \"%s", prog_argv[0]);
-# ifdef PROFILING
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_CCS: fprintf(hp_file, " -h%c", CCchar); break;
- case HEAP_BY_MOD: fprintf(hp_file, " -h%c", MODchar); break;
- case HEAP_BY_DESCR: fprintf(hp_file, " -h%c", DESCRchar); break;
- case HEAP_BY_TYPE: fprintf(hp_file, " -h%c", TYPEchar); break;
- default: /* nothing */
+#ifdef PROFILING
+ {
+ int count;
+ for(count = 1; count < prog_argc; count++)
+ fprintf(hp_file, " %s", prog_argv[count]);
+ fprintf(hp_file, " +RTS ");
+ for(count = 0; count < rts_argc; count++)
+ fprintf(hp_file, "%s ", rts_argv[count]);
+ fprintf(hp_file, "\n");
}
- if (RtsFlags.ProfFlags.ccSelector)
- fprintf(hp_file, " -hc{%s}", RtsFlags.ProfFlags.ccSelector);
- if (RtsFlags.ProfFlags.modSelector)
- fprintf(hp_file, " -hm{%s}", RtsFlags.ProfFlags.modSelector);
- if (RtsFlags.ProfFlags.descrSelector)
- fprintf(hp_file, " -hd{%s}", RtsFlags.ProfFlags.descrSelector);
- if (RtsFlags.ProfFlags.typeSelector)
- fprintf(hp_file, " -hy{%s}", RtsFlags.ProfFlags.typeSelector);
-# endif /* PROFILING */
+#endif /* PROFILING */
fprintf(hp_file, "\"\n" );
@@ -224,6 +220,17 @@ endHeapProfiling(void)
return;
}
+#ifdef PROFILING
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_RETAINER:
+ endRetainerProfiling();
+ break;
+ case HEAP_BY_LDV:
+ endLdvProfiling();
+ break;
+ }
+#endif
+
seconds = mut_user_time();
fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
@@ -417,24 +424,48 @@ clearCCSResid(CostCentreStack *ccs)
}
static void
-fprint_ccs(FILE *fp, CostCentreStack *ccs, nat components)
+fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
{
- CostCentre *cc;
- CostCentreStack *prev;
+ char buf[max_length+1];
+ nat next_offset = 0;
+ nat written;
+ char *template;
+
+ // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
+ if (ccs == CCS_MAIN) {
+ fprintf(fp, "MAIN");
+ return;
+ }
- cc = ccs->cc;
- prev = ccs->prevStack;
+ // keep printing components of the stack until we run out of space
+ // in the buffer. If we run out of space, end with "...".
+ for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
- if (prev == NULL
- || prev->cc->is_caf != CC_IS_BORING
- || components == 1) {
- fprintf(fp,"%s",cc->label);
- return;
+ // CAF cost centres print as M.CAF, but we leave the module
+ // name out of all the others to save space.
+ if (!strcmp(ccs->cc->label,"CAF")) {
+ written = snprintf(buf+next_offset,
+ (int)max_length-3-(int)next_offset,
+ "%s.CAF", ccs->cc->module);
+ } else {
+ if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
+ template = "%s/";
+ } else {
+ template = "%s";
+ }
+ written = snprintf(buf+next_offset,
+ (int)max_length-3-(int)next_offset,
+ template, ccs->cc->label);
+ }
- } else {
- fprint_ccs(fp, ccs->prevStack,components-1);
- fprintf(fp,"/%s",cc->label);
- }
+ if (next_offset+written >= max_length-4) {
+ sprintf(buf+max_length-4, "...");
+ break;
+ } else {
+ next_offset += written;
+ }
+ }
+ fprintf(fp, "%s", buf);
}
static void
@@ -444,7 +475,8 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
if (ccs->mem_resid != 0) {
fprintf(fp," ");
- fprint_ccs(fp,ccs,2/*print 2 components only*/);
+ // print as much of the CCS as possible in 20 chars, ending with "..."
+ fprint_ccs(fp,ccs,30);
fprintf(fp," %ld\n", ccs->mem_resid * sizeof(W_));
}
@@ -455,75 +487,190 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
}
}
-static
-rtsBool str_matches_selector ( char* str, char* sel )
+static rtsBool
+str_matches_selector( char* str, char* sel )
{
char* p;
- /* fprintf(stderr, "str_matches_selector %s %s\n", str, sel); */
+ // fprintf(stderr, "str_matches_selector %s %s\n", str, sel);
while (1) {
- /* Compare str against wherever we've got to in sel. */
- p = str;
- while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
- p++; sel++;
- }
- /* Match if all of str used and have reached the end of a sel
- fragment. */
- if (*p == '\0' && (*sel == ',' || *sel == '\0'))
- return rtsTrue;
-
- /* No match. Advance sel to the start of the next elem. */
- while (*sel != ',' && *sel != '\0') sel++;
- if (*sel == ',') sel++;
-
- /* Run out of sel ?? */
- if (*sel == '\0') return rtsFalse;
+ // Compare str against wherever we've got to in sel.
+ p = str;
+ while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
+ p++; sel++;
+ }
+ // Match if all of str used and have reached the end of a sel fragment.
+ if (*p == '\0' && (*sel == ',' || *sel == '\0'))
+ return rtsTrue;
+
+ // No match. Advance sel to the start of the next elem.
+ while (*sel != ',' && *sel != '\0') sel++;
+ if (*sel == ',') sel++;
+
+ /* Run out of sel ?? */
+ if (*sel == '\0') return rtsFalse;
}
}
-/* Figure out whether a closure should be counted in this census, by
- testing against all the specified constraints. */
-static
-rtsBool satisfies_constraints ( StgClosure* p )
+// Figure out whether a closure should be counted in this census, by
+// testing against all the specified constraints.
+rtsBool
+closureSatisfiesConstraints( StgClosure* p )
{
rtsBool b;
if (RtsFlags.ProfFlags.modSelector) {
- b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
- RtsFlags.ProfFlags.modSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
+ RtsFlags.ProfFlags.modSelector );
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.descrSelector) {
- b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
- RtsFlags.ProfFlags.descrSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
+ RtsFlags.ProfFlags.descrSelector );
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.typeSelector) {
- b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
+ b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
RtsFlags.ProfFlags.typeSelector );
- if (!b) return rtsFalse;
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.ccSelector) {
- b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
- RtsFlags.ProfFlags.ccSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
+ RtsFlags.ProfFlags.ccSelector );
+ if (!b) return rtsFalse;
}
return rtsTrue;
}
#endif /* PROFILING */
+/* -----------------------------------------------------------------------------
+ * Code to perform a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+heapCensusChain( bdescr *bd )
+{
+ StgPtr p;
+ StgInfoTable *info;
+ nat size;
+#ifdef PROFILING
+ nat real_size;
+#endif
-static double time_of_last_heapCensus = 0.0;
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+ while (p < bd->free) {
+ info = get_itbl((StgClosure *)p);
+
+ switch (info->type) {
+
+ case CONSTR:
+ case BCO:
+ case FUN:
+ case THUNK:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ case MVAR:
+ case MUT_VAR:
+ case MUT_CONS:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case THUNK_1_0: /* ToDo - shouldn't be here */
+ case THUNK_0_1: /* " ditto " */
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case PAP:
+ case AP_UPD:
+ size = pap_sizeW((StgPAP *)p);
+ break;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW(stgCast(StgArrWords*,p));
+ break;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+ break;
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)p);
+ break;
+
+ default:
+ barf("heapCensus");
+ }
+
+#ifdef DEBUG_HEAP_PROF
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_INFOPTR:
+ add_data((void *)(*p), size * sizeof(W_));
+ break;
+ case HEAP_BY_CLOSURE_TYPE:
+ closure_types[info->type] += size * sizeof(W_);
+ break;
+ }
+#endif
+
+#ifdef PROFILING
+ // subtract the profiling overhead
+ real_size = size - sizeofW(StgProfHeader);
+
+ if (closureSatisfiesConstraints((StgClosure*)p)) {
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_CCS:
+ ((StgClosure *)p)->header.prof.ccs->mem_resid += real_size;
+ break;
+ case HEAP_BY_MOD:
+ strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
+ ->mem_resid += real_size;
+ break;
+ case HEAP_BY_DESCR:
+ strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid
+ += real_size;
+ break;
+ case HEAP_BY_TYPE:
+ strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
+ += real_size;
+ break;
+ default:
+ barf("heapCensus; doHeapProfile");
+ }
+ }
+#endif
+ p += size;
+ }
+ }
+}
void
-heapCensus(void)
+heapCensus( void )
{
- bdescr *bd;
- const StgInfoTable *info;
StgDouble time;
- nat size;
- StgPtr p;
-#ifdef PROFILING
- nat elapsed;
-#endif
+ nat g, s;
#ifdef DEBUG_HEAP_PROF
switch (RtsFlags.ProfFlags.doHeapProfile) {
@@ -542,21 +689,6 @@ heapCensus(void)
#endif
#ifdef PROFILING
- /*
- * We only continue iff we've waited long enough,
- * otherwise, we just dont do the census.
- */
-
- time = mut_user_time_during_GC();
- elapsed = (time - time_of_last_heapCensus) * 1000;
- if (elapsed < RtsFlags.ProfFlags.profileFrequency) {
- return;
- }
- time_of_last_heapCensus = time;
-#endif
-
-
-#ifdef PROFILING
switch (RtsFlags.ProfFlags.doHeapProfile) {
case NO_HEAP_PROFILING:
return;
@@ -574,136 +706,27 @@ heapCensus(void)
}
#endif
- /* Only do heap profiling in a two-space heap */
- ASSERT(RtsFlags.GcFlags.generations == 1);
- bd = g0s0->to_blocks;
-
+ time = mut_user_time_during_GC();
fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", time);
-
- while (bd != NULL) {
- p = bd->start;
- while (p < bd->free) {
- info = get_itbl((StgClosure *)p);
-
- switch (info->type) {
-
- case CONSTR:
- if (((StgClosure *)p)->header.info == &stg_DEAD_WEAK_info
- && !(LOOKS_LIKE_GHC_INFO(*(p + sizeW_fromITBL(info))))) {
- size = sizeofW(StgWeak);
- break;
- }
- /* else, fall through... */
-
- case BCO:
- case FUN:
- case THUNK:
- case IND_PERM:
- case IND_OLDGEN_PERM:
- case CAF_BLACKHOLE:
- case SE_CAF_BLACKHOLE:
- case SE_BLACKHOLE:
- case BLACKHOLE:
- case BLACKHOLE_BQ:
- case WEAK:
- case FOREIGN:
- case STABLE_NAME:
- case MVAR:
- case MUT_VAR:
- case CONSTR_INTLIKE:
- case CONSTR_CHARLIKE:
- case FUN_1_0:
- case FUN_0_1:
- case FUN_1_1:
- case FUN_0_2:
- case FUN_2_0:
- case THUNK_1_1:
- case THUNK_0_2:
- case THUNK_2_0:
- case CONSTR_1_0:
- case CONSTR_0_1:
- case CONSTR_1_1:
- case CONSTR_0_2:
- case CONSTR_2_0:
- size = sizeW_fromITBL(info);
- break;
-
- case THUNK_1_0: /* ToDo - shouldn't be here */
- case THUNK_0_1: /* " ditto " */
- case THUNK_SELECTOR:
- size = sizeofW(StgHeader) + MIN_UPD_SIZE;
- break;
-
- case AP_UPD: /* we can treat this as being the same as a PAP */
- case PAP:
- size = pap_sizeW((StgPAP *)p);
- break;
-
- case ARR_WORDS:
- size = arr_words_sizeW(stgCast(StgArrWords*,p));
- break;
-
- case MUT_ARR_PTRS:
- case MUT_ARR_PTRS_FROZEN:
- size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
- break;
-
- case TSO:
- size = tso_sizeW((StgTSO *)p);
- break;
-
- default:
- barf("heapCensus");
- }
-
-#ifdef DEBUG_HEAP_PROF
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_INFOPTR:
- add_data((void *)(*p), size * sizeof(W_));
- break;
- case HEAP_BY_CLOSURE_TYPE:
- closure_types[info->type] += size * sizeof(W_);
- break;
- }
-#endif
-# ifdef PROFILING
- if (satisfies_constraints((StgClosure*)p)) {
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_CCS:
- ((StgClosure *)p)->header.prof.ccs->mem_resid += size;
- break;
- case HEAP_BY_MOD:
- strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
- ->mem_resid += size;
- break;
- case HEAP_BY_DESCR:
- strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid
- += size;
- break;
- case HEAP_BY_TYPE:
- strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
- += size;
- break;
- default:
- barf("heapCensus; doHeapProfile");
- }
+ if (RtsFlags.GcFlags.generations == 1) {
+ heapCensusChain( g0s0->to_blocks );
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ heapCensusChain( generations[g].steps[s].blocks );
+ }
}
-# endif
-
- p += size;
- }
- bd = bd->link;
}
#ifdef DEBUG_HEAP_PROF
switch (RtsFlags.ProfFlags.doHeapProfile) {
case HEAP_BY_INFOPTR:
- fprint_data(hp_file);
- break;
+ fprint_data(hp_file);
+ break;
case HEAP_BY_CLOSURE_TYPE:
- fprint_closure_types(hp_file);
- break;
+ fprint_closure_types(hp_file);
+ break;
}
#endif
View
9 ghc/rts/ProfHeap.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: ProfHeap.h,v 1.1 1999/09/15 13:46:29 simonmar Exp $
+ * $Id: ProfHeap.h,v 1.2 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -8,6 +8,7 @@
* ---------------------------------------------------------------------------*/
-void heapCensus(void);
-extern nat initHeapProfiling(void);
-void endHeapProfiling(void);
+extern void heapCensus( void );
+extern nat initHeapProfiling( void );
+extern void endHeapProfiling( void );
+extern rtsBool closureSatisfiesConstraints( StgClosure* p );
View
74 ghc/rts/Profiling.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Profiling.c,v 1.24 2001/10/18 14:41:01 simonmar Exp $
+ * $Id: Profiling.c,v 1.25 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -19,6 +19,8 @@
#include "Itimer.h"
#include "ProfHeap.h"
#include "Arena.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
/*
* Profiling allocation arena.
@@ -144,9 +146,6 @@ static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
-#ifdef DEBUG
-static void printCCS ( CostCentreStack *ccs );
-#endif
static void initTimeProfiling ( void );
static void initProfilingLogFile( void );
@@ -195,6 +194,15 @@ initProfiling1 (void)
/* cost centres are registered by the per-module
* initialisation code now...
*/
+
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_RETAINER:
+ initRetainerProfiling();
+ break;
+ case HEAP_BY_LDV:
+ initLdvProfiling();
+ break;
+ }
}
void
@@ -242,6 +250,13 @@ initProfilingLogFile(void)
if ((prof_file = fopen(prof_filename, "w")) == NULL) {
fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
RtsFlags.CcFlags</