Permalink
Browse files

New implementation of BLACKHOLEs

This replaces the global blackhole_queue with a clever scheme that
enables us to queue up blocked threads on the closure that they are
blocked on, while still avoiding atomic instructions in the common
case.

Advantages:

 - gets rid of a locked global data structure and some tricky GC code
   (replacing it with some per-thread data structures and different
   tricky GC code :)

 - wakeups are more prompt: parallel/concurrent performance should
   benefit.  I haven't seen anything dramatic in the parallel
   benchmarks so far, but a couple of threading benchmarks do improve
   a bit.

 - waking up a thread blocked on a blackhole is now O(1) (e.g. if
   it is the target of throwTo).

 - less sharing and better separation of Capabilities: communication
   is done with messages, the data structures are strictly owned by a
   Capability and cannot be modified except by sending messages.

 - this change will utlimately enable us to do more intelligent
   scheduling when threads block on each other.  This is what started
   off the whole thing, but it isn't done yet (#3838).

I'll be documenting all this on the wiki in due course.
  • Loading branch information...
1 parent 79957d7 commit 5d52d9b64c21dcf77849866584744722f8121389 @simonmar simonmar committed Mar 29, 2010
View
@@ -58,6 +58,7 @@ module CLabel (
mkSplitMarkerLabel,
mkDirty_MUT_VAR_Label,
mkUpdInfoLabel,
+ mkBHUpdInfoLabel,
mkIndStaticInfoLabel,
mkMainCapabilityLabel,
mkMAP_FROZEN_infoLabel,
@@ -400,6 +401,7 @@ mkStaticConEntryLabel name c = IdLabel name c StaticConEntry
mkSplitMarkerLabel = CmmLabel rtsPackageId (fsLit "__stg_split_marker") CmmCode
mkDirty_MUT_VAR_Label = CmmLabel rtsPackageId (fsLit "dirty_MUT_VAR") CmmCode
mkUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_upd_frame") CmmInfo
+mkBHUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsPackageId (fsLit "stg_IND_STATIC") CmmInfo
mkMainCapabilityLabel = CmmLabel rtsPackageId (fsLit "MainCapability") CmmData
mkMAP_FROZEN_infoLabel = CmmLabel rtsPackageId (fsLit "stg_MUT_ARR_PTRS_FROZEN0") CmmInfo
@@ -284,7 +284,6 @@ getSequelAmode
OnStack -> do { sp_rel <- getSpRelOffset virt_sp
; returnFC (CmmLoad sp_rel bWord) }
- UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel))
CaseAlts lbl _ _ -> returnFC (CmmLit (CmmLabel lbl))
}
@@ -474,7 +474,12 @@ emitBlackHoleCode is_single_entry = do
then do
tickyBlackHole (not is_single_entry)
let bh_info = CmmReg (CmmGlobal EagerBlackholeInfo)
- stmtC (CmmStore (CmmReg nodeReg) bh_info)
+ stmtsC [
+ CmmStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
+ (CmmReg (CmmGlobal CurrentTSO)),
+ CmmCall (CmmPrim MO_WriteBarrier) [] [] CmmUnsafe CmmMayReturn,
+ CmmStore (CmmReg nodeReg) bh_info
+ ]
else
nopC
\end{code}
@@ -489,17 +494,23 @@ setupUpdate closure_info code
= code
| not (isStaticClosure closure_info)
- = if closureUpdReqd closure_info
- then do { tickyPushUpdateFrame; pushUpdateFrame (CmmReg nodeReg) code }
- else do { tickyUpdateFrameOmitted; code }
-
+ = do
+ if not (closureUpdReqd closure_info)
+ then do tickyUpdateFrameOmitted; code
+ else do
+ tickyPushUpdateFrame
+ dflags <- getDynFlags
+ if not opt_SccProfilingOn && dopt Opt_EagerBlackHoling dflags
+ then pushBHUpdateFrame (CmmReg nodeReg) code
+ else pushUpdateFrame (CmmReg nodeReg) code
+
| otherwise -- A static closure
= do { tickyUpdateBhCaf closure_info
; if closureUpdReqd closure_info
then do -- Blackhole the (updatable) CAF:
{ upd_closure <- link_caf closure_info True
- ; pushUpdateFrame upd_closure code }
+ ; pushBHUpdateFrame upd_closure code }
else do
{ -- krc: removed some ticky-related code here.
; tickyUpdateFrameOmitted
@@ -553,7 +564,8 @@ link_caf cl_info _is_upd = do
{ -- Alloc black hole specifying CC_HDR(Node) as the cost centre
; let use_cc = costCentreFrom (CmmReg nodeReg)
blame_cc = use_cc
- ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc []
+ tso = CmmReg (CmmGlobal CurrentTSO)
+ ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc [(tso,fixedHdrSize)]
; hp_rel <- getHpRelOffset hp_offset
-- Call the RTS function newCAF to add the CAF to the CafList
@@ -169,7 +169,6 @@ block.
\begin{code}
data Sequel
= OnStack -- Continuation is on the stack
- | UpdateCode -- Continuation is update
| CaseAlts
CLabel -- Jump to this; if the continuation is for a vectored
@@ -17,7 +17,7 @@ module CgStackery (
setStackFrame, getStackFrame,
mkVirtStkOffsets, mkStkAmodes,
freeStackSlots,
- pushUpdateFrame, emitPushUpdateFrame,
+ pushUpdateFrame, pushBHUpdateFrame, emitPushUpdateFrame,
) where
#include "HsVersions.h"
@@ -265,6 +265,14 @@ to reflect the frame pushed.
\begin{code}
pushUpdateFrame :: CmmExpr -> Code -> Code
pushUpdateFrame updatee code
+ = pushSpecUpdateFrame mkUpdInfoLabel updatee code
+
+pushBHUpdateFrame :: CmmExpr -> Code -> Code
+pushBHUpdateFrame updatee code
+ = pushSpecUpdateFrame mkBHUpdInfoLabel updatee code
+
+pushSpecUpdateFrame :: CLabel -> CmmExpr -> Code -> Code
+pushSpecUpdateFrame lbl updatee code
= do {
when debugIsOn $ do
{ EndOfBlockInfo _ sequel <- getEndOfBlockInfo ;
@@ -277,15 +285,25 @@ pushUpdateFrame updatee code
-- The location of the lowest-address
-- word of the update frame itself
- ; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $
- do { emitPushUpdateFrame frame_addr updatee
+ -- NB. we used to set the Sequel to 'UpdateCode' so
+ -- that we could jump directly to the update code if
+ -- we know that the next frame on the stack is an
+ -- update frame. However, the RTS can sometimes
+ -- change an update frame into something else (see
+ -- e.g. Note [upd-black-hole] in rts/sm/Scav.c), so we
+ -- no longer make this assumption.
+ ; setEndOfBlockInfo (EndOfBlockInfo vsp OnStack) $
+ do { emitSpecPushUpdateFrame lbl frame_addr updatee
; code }
}
emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code
-emitPushUpdateFrame frame_addr updatee = do
+emitPushUpdateFrame = emitSpecPushUpdateFrame mkUpdInfoLabel
+
+emitSpecPushUpdateFrame :: CLabel -> CmmExpr -> CmmExpr -> Code
+emitSpecPushUpdateFrame lbl frame_addr updatee = do
stmtsC [ -- Set the info word
- CmmStore frame_addr (mkLblExpr mkUpdInfoLabel)
+ CmmStore frame_addr (mkLblExpr lbl)
, -- And the updatee
CmmStore (cmmOffsetB frame_addr off_updatee) updatee ]
initUpdFrameProf frame_addr
View
@@ -106,10 +106,18 @@ void _assertFail(const char *filename, unsigned int linenum)
else \
_assertFail(__FILE__, __LINE__)
+#define CHECKM(predicate, msg, ...) \
+ if (predicate) \
+ /*null*/; \
+ else \
+ barf(msg, ##__VA_ARGS__)
+
#ifndef DEBUG
#define ASSERT(predicate) /* nothing */
+#define ASSERTM(predicate,msg,...) /* nothing */
#else
#define ASSERT(predicate) CHECK(predicate)
+#define ASSERTM(predicate,msg,...) CHECKM(predicate,msg,##__VA_ARGS__)
#endif /* DEBUG */
/*
@@ -291,6 +291,7 @@ main(int argc, char *argv[])
closure_field(StgTSO, trec);
closure_field(StgTSO, flags);
closure_field(StgTSO, dirty);
+ closure_field(StgTSO, bq);
closure_field_("StgTSO_CCCS", StgTSO, prof.CCCS);
tso_field(StgTSO, sp);
tso_field_offset(StgTSO, stack);
@@ -382,6 +383,17 @@ main(int argc, char *argv[])
closure_size(StgStableName);
closure_field(StgStableName,sn);
+ closure_size(StgBlockingQueue);
+ closure_field(StgBlockingQueue, bh);
+ closure_field(StgBlockingQueue, owner);
+ closure_field(StgBlockingQueue, queue);
+ closure_field(StgBlockingQueue, link);
+
+ closure_size(MessageBlackHole);
+ closure_field(MessageBlackHole, link);
+ closure_field(MessageBlackHole, tso);
+ closure_field(MessageBlackHole, bh);
+
struct_field_("RtsFlags_ProfFlags_showCCSOnException",
RTS_FLAGS, ProfFlags.showCCSOnException);
struct_field_("RtsFlags_DebugFlags_apply",
@@ -129,6 +129,12 @@
SET_HDR(c,info,costCentreStack); \
(c)->words = n_words;
+// Use when changing a closure from one kind to another
+#define OVERWRITE_INFO(c, new_info) \
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)(c)); \
+ SET_INFO((c), (new_info)); \
+ LDV_RECORD_CREATE(c);
+
/* -----------------------------------------------------------------------------
How to get hold of the static link field for a static closure.
-------------------------------------------------------------------------- */
@@ -249,7 +255,7 @@ INLINE_HEADER StgOffset THUNK_SELECTOR_sizeW ( void )
{ return sizeofW(StgSelector); }
INLINE_HEADER StgOffset BLACKHOLE_sizeW ( void )
-{ return sizeofW(StgHeader)+MIN_PAYLOAD_SIZE; }
+{ return sizeofW(StgInd); } // a BLACKHOLE is a kind of indirection
/* --------------------------------------------------------------------------
Sizes of closures
@@ -62,7 +62,7 @@
#define UPDATE_FRAME 38
#define CATCH_FRAME 39
#define STOP_FRAME 40
-#define CAF_BLACKHOLE 41
+#define BLOCKING_QUEUE 41
#define BLACKHOLE 42
#define MVAR_CLEAN 43
#define MVAR_DIRTY 44
@@ -127,6 +127,14 @@ typedef struct {
StgInfoTable *saved_info;
} StgIndStatic;
+typedef struct StgBlockingQueue_ {
+ StgHeader header;
+ struct StgBlockingQueue_ *link; // here so it looks like an IND
+ StgClosure *bh; // the BLACKHOLE
+ StgTSO *owner;
+ struct MessageBlackHole_ *queue;
+} StgBlockingQueue;
+
typedef struct {
StgHeader header;
StgWord words;
@@ -433,10 +441,17 @@ typedef struct MessageWakeup_ {
typedef struct MessageThrowTo_ {
StgHeader header;
- Message *link;
+ struct MessageThrowTo_ *link;
StgTSO *source;
StgTSO *target;
StgClosure *exception;
} MessageThrowTo;
+typedef struct MessageBlackHole_ {
+ StgHeader header;
+ struct MessageBlackHole_ *link;
+ StgTSO *tso;
+ StgClosure *bh;
+} MessageBlackHole;
+
#endif /* RTS_STORAGE_CLOSURES_H */
View
@@ -46,6 +46,7 @@ typedef struct {
/* Reason for thread being blocked. See comment above struct StgTso_. */
typedef union {
StgClosure *closure;
+ struct MessageBlackHole_ *bh;
struct MessageThrowTo_ *throwto;
struct MessageWakeup_ *wakeup;
StgInt fd; /* StgInt instead of int, so that it's the same size as the ptrs */
@@ -78,12 +79,17 @@ typedef struct StgTSO_ {
*/
struct StgTSO_* _link;
/*
+ Currently used for linking TSOs on:
+ * cap->run_queue_{hd,tl}
+ * MVAR queue
+ * (non-THREADED_RTS); the blocked_queue
+ * and pointing to the relocated version of a ThreadRelocated
+
NOTE!!! do not modify _link directly, it is subject to
a write barrier for generational GC. Instead use the
setTSOLink() function. Exceptions to this rule are:
* setting the link field to END_TSO_QUEUE
- * putting a TSO on the blackhole_queue
* setting the link field of the currently running TSO, as it
will already be dirty.
*/
@@ -127,6 +133,12 @@ typedef struct StgTSO_ {
*/
struct MessageThrowTo_ * blocked_exceptions;
+ /*
+ A list of StgBlockingQueue objects, representing threads blocked
+ on thunks that are under evaluation by this thread.
+ */
+ struct StgBlockingQueue_ *bq;
+
#ifdef TICKY_TICKY
/* TICKY-specific stuff would go here. */
#endif
@@ -152,6 +164,18 @@ typedef struct StgTSO_ {
void dirty_TSO (Capability *cap, StgTSO *tso);
void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
+// Apply to a TSO before looking at it if you are not sure whether it
+// might be ThreadRelocated or not (basically, that's most of the time
+// unless the TSO is the current TSO).
+//
+INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso)
+{
+ while (tso->what_next == ThreadRelocated) {
+ tso = tso->_link;
+ }
+ return tso;
+}
+
/* -----------------------------------------------------------------------------
Invariants:
@@ -44,6 +44,7 @@
/* Stack frames */
RTS_RET_INFO(stg_upd_frame_info);
+RTS_RET_INFO(stg_bh_upd_frame_info);
RTS_RET_INFO(stg_marked_upd_frame_info);
RTS_RET_INFO(stg_noupd_frame_info);
RTS_RET_INFO(stg_catch_frame_info);
@@ -54,6 +55,7 @@ RTS_RET_INFO(stg_catch_stm_frame_info);
RTS_RET_INFO(stg_unblockAsyncExceptionszh_ret_info);
RTS_ENTRY(stg_upd_frame_ret);
+RTS_ENTRY(stg_bh_upd_frame_ret);
RTS_ENTRY(stg_marked_upd_frame_ret);
// RTS_FUN(stg_interp_constr_entry);
@@ -90,12 +92,12 @@ RTS_INFO(stg_IND_STATIC_info);
RTS_INFO(stg_IND_PERM_info);
RTS_INFO(stg_IND_OLDGEN_info);
RTS_INFO(stg_IND_OLDGEN_PERM_info);
-RTS_INFO(stg_CAF_UNENTERED_info);
-RTS_INFO(stg_CAF_ENTERED_info);
-RTS_INFO(stg_WHITEHOLE_info);
RTS_INFO(stg_BLACKHOLE_info);
-RTS_INFO(__stg_EAGER_BLACKHOLE_info);
RTS_INFO(stg_CAF_BLACKHOLE_info);
+RTS_INFO(__stg_EAGER_BLACKHOLE_info);
+RTS_INFO(stg_WHITEHOLE_info);
+RTS_INFO(stg_BLOCKING_QUEUE_CLEAN_info);
+RTS_INFO(stg_BLOCKING_QUEUE_DIRTY_info);
RTS_FUN_INFO(stg_BCO_info);
RTS_INFO(stg_EVACUATED_info);
@@ -115,7 +117,9 @@ RTS_INFO(stg_MUT_VAR_CLEAN_info);
RTS_INFO(stg_MUT_VAR_DIRTY_info);
RTS_INFO(stg_END_TSO_QUEUE_info);
RTS_INFO(stg_MSG_WAKEUP_info);
+RTS_INFO(stg_MSG_TRY_WAKEUP_info);
RTS_INFO(stg_MSG_THROWTO_info);
+RTS_INFO(stg_MSG_BLACKHOLE_info);
RTS_INFO(stg_MUT_CONS_info);
RTS_INFO(stg_catch_info);
RTS_INFO(stg_PAP_info);
@@ -142,12 +146,10 @@ RTS_ENTRY(stg_IND_STATIC_entry);
RTS_ENTRY(stg_IND_PERM_entry);
RTS_ENTRY(stg_IND_OLDGEN_entry);
RTS_ENTRY(stg_IND_OLDGEN_PERM_entry);
-RTS_ENTRY(stg_CAF_UNENTERED_entry);
-RTS_ENTRY(stg_CAF_ENTERED_entry);
RTS_ENTRY(stg_WHITEHOLE_entry);
RTS_ENTRY(stg_BLACKHOLE_entry);
-RTS_ENTRY(__stg_EAGER_BLACKHOLE_entry);
RTS_ENTRY(stg_CAF_BLACKHOLE_entry);
+RTS_ENTRY(__stg_EAGER_BLACKHOLE_entry);
RTS_ENTRY(stg_BCO_entry);
RTS_ENTRY(stg_EVACUATED_entry);
RTS_ENTRY(stg_WEAK_entry);
@@ -166,7 +168,9 @@ RTS_ENTRY(stg_MUT_VAR_CLEAN_entry);
RTS_ENTRY(stg_MUT_VAR_DIRTY_entry);
RTS_ENTRY(stg_END_TSO_QUEUE_entry);
RTS_ENTRY(stg_MSG_WAKEUP_entry);
+RTS_ENTRY(stg_MSG_TRY_WAKEUP_entry);
RTS_ENTRY(stg_MSG_THROWTO_entry);
+RTS_ENTRY(stg_MSG_BLACKHOLE_entry);
RTS_ENTRY(stg_MUT_CONS_entry);
RTS_ENTRY(stg_catch_entry);
RTS_ENTRY(stg_PAP_entry);
@@ -404,6 +408,8 @@ RTS_FUN(stg_PAP_apply);
RTS_RET_INFO(stg_enter_info);
RTS_ENTRY(stg_enter_ret);
+RTS_RET_INFO(stg_enter_checkbh_info);
+RTS_ENTRY(stg_enter_checkbh_ret);
RTS_RET_INFO(stg_gc_void_info);
RTS_ENTRY(stg_gc_void_ret);
Oops, something went wrong.

0 comments on commit 5d52d9b

Please sign in to comment.