Skip to content

Commit

Permalink
[project @ 2005-05-10 13:25:41 by simonmar]
Browse files Browse the repository at this point in the history
Two SMP-related changes:

  - New storage manager interface:

    bdescr *allocateLocal(StgRegTable *reg, nat words)

    which allocates from the current thread's nursery (being careful
    not to clash with the heap pointer).  It can do this without
    taking any locks; the lock only has to be taken if a block needs
    to be allocated.  allocateLocal() is now used instead of allocate()
    in a few PrimOps.

    This removes locks from most Integer operations, cutting down
    the overhead for SMP a bit more.

    To make this work, we have to be able to grab the current thread's
    Capability out of thin air (i.e. when called from GMP), so the
    Capability subsystem needs to keep a hash from thread IDs to
    Capabilities.

  - Small MVar optimisation: instead of taking the global
    storage-manager lock, do our own locking of MVars with a bit of
    inline assembly (x86 only for now).
  • Loading branch information
simonmar committed May 10, 2005
1 parent 24928a5 commit bf82198
Show file tree
Hide file tree
Showing 13 changed files with 246 additions and 147 deletions.
5 changes: 4 additions & 1 deletion ghc/includes/Cmm.h
Expand Up @@ -317,8 +317,11 @@
HP_CHK_GEN(alloc,liveness,reentry); \
TICK_ALLOC_HEAP_NOCTR(alloc);

// allocateLocal() allocates from the nursery, so we check to see
// whether the nursery is nearly empty in any function that uses
// allocateLocal() - this includes many of the primops.
#define MAYBE_GC(liveness,reentry) \
if (CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \
if (bdescr_link(CurrentNursery) == NULL) { \
R9 = liveness; \
R10 = reentry; \
jump stg_gc_gen_hp; \
Expand Down
3 changes: 2 additions & 1 deletion ghc/includes/Regs.h
Expand Up @@ -87,7 +87,8 @@ typedef struct StgRegTable_ {
StgPtr rHpLim;
struct StgTSO_ *rCurrentTSO;
struct step_ *rNursery;
struct bdescr_ *rCurrentNursery;
struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */
StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
#if defined(SMP) || defined(PAR)
StgSparkPool rSparks; /* per-task spark pool */
Expand Down
5 changes: 0 additions & 5 deletions ghc/includes/Rts.h
Expand Up @@ -110,13 +110,8 @@ extern void _assertFail (char *, unsigned int);
#include "Parallel.h"

/* STG/Optimised-C related stuff */
#include "SMP.h"
#include "Block.h"

#ifdef SMP
#include <pthread.h>
#endif

/* GNU mp library */
#include "gmp.h"

Expand Down
75 changes: 26 additions & 49 deletions ghc/includes/SMP.h
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
*
* (c) The GHC Team, 1999
* (c) The GHC Team, 2005
*
* Macros for SMP support
*
Expand All @@ -23,60 +23,37 @@
#error Build options incompatible with SMP.
#endif

/*
* CMPXCHG - this instruction is the standard "test & set". We use it
* for locking closures in the thunk and blackhole entry code. If the
* closure is already locked, or has an unexpected info pointer
* (because another thread is altering it in parallel), we just jump
* to the new entry point.
*/
#if defined(i386_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE)
#define CMPXCHG(p, cmp, new) \
__asm__ __volatile__ ( \
"lock ; cmpxchg %1, %0\n" \
"\tje 1f\n" \
"\tjmp *%%eax\n" \
"\t1:\n" \
: /* no outputs */ \
: "m" (p), "r" (new), "r" (cmp) \
)

/*
* XCHG - the atomic exchange instruction. Used for locking closures
* during updates (see LOCK_CLOSURE below) and the MVar primops.
*/
#define XCHG(reg, obj) \
__asm__ __volatile__ ( \
"xchgl %1,%0" \
:"+r" (reg), "+m" (obj) \
: /* no input-only operands */ \
)

INLINE_HEADER StgWord
xchg(StgPtr p, StgWord w)
{
StgWord result;
result = w;
__asm__ __volatile__ (
"xchgl %1,%0"
:"+r" (result), "+m" (*p)
: /* no input-only operands */
);
return result;
}

INLINE_HEADER StgInfoTable *
lockClosure(StgClosure *p)
{
StgWord info;
#if 0
do {
info = xchg((P_)&p->header.info, (W_)&stg_WHITEHOLE_info);
if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
yieldThread();
} while (1);
#else
#error SMP macros not defined for this architecture
info = p->header.info;
#endif

/*
* LOCK_CLOSURE locks the specified closure, busy waiting for any
* existing locks to be cleared.
*/
#define LOCK_CLOSURE(c) \
({ \
const StgInfoTable *__info; \
__info = &stg_WHITEHOLE_info; \
do { \
XCHG(__info,((StgClosure *)(c))->header.info); \
} while (__info == &stg_WHITEHOLE_info); \
__info; \
})

#define LOCK_THUNK(__info) \
CMPXCHG(R1.cl->header.info, __info, &stg_WHITEHOLE_info);

#else /* !SMP */

#define LOCK_CLOSURE(c) /* nothing */
#define LOCK_THUNK(__info) /* nothing */
}

#endif /* SMP */

Expand Down
2 changes: 2 additions & 0 deletions ghc/includes/StgMiscClosures.h
Expand Up @@ -95,6 +95,7 @@ RTS_INFO(stg_IND_OLDGEN_info);
RTS_INFO(stg_IND_OLDGEN_PERM_info);
RTS_INFO(stg_CAF_UNENTERED_info);
RTS_INFO(stg_CAF_ENTERED_info);
RTS_INFO(stg_WHITEHOLE_info);
RTS_INFO(stg_BLACKHOLE_info);
RTS_INFO(stg_CAF_BLACKHOLE_info);
#ifdef TICKY_TICKY
Expand Down Expand Up @@ -155,6 +156,7 @@ RTS_ENTRY(stg_IND_OLDGEN_entry);
RTS_ENTRY(stg_IND_OLDGEN_PERM_entry);
RTS_ENTRY(stg_CAF_UNENTERED_entry);
RTS_ENTRY(stg_CAF_ENTERED_entry);
RTS_ENTRY(stg_WHITEHOLE_entry);
RTS_ENTRY(stg_BLACKHOLE_entry);
RTS_ENTRY(stg_CAF_BLACKHOLE_entry);
#ifdef TICKY_TICKY
Expand Down
4 changes: 4 additions & 0 deletions ghc/includes/Storage.h
Expand Up @@ -139,6 +139,7 @@ extern void exitStorage(void);
-------------------------------------------------------------------------- */

extern StgPtr allocate ( nat n );
extern StgPtr allocateLocal ( StgRegTable *reg, nat n );
extern StgPtr allocatePinned ( nat n );
extern lnat allocated_bytes ( void );

Expand Down Expand Up @@ -193,6 +194,9 @@ extern void GarbageCollect(void (*get_roots)(evac_fn),rtsBool force_major_gc);
*/
#if defined(SMP)
extern Mutex sm_mutex;
#endif

#if defined(SMP)
#define ACQUIRE_SM_LOCK ACQUIRE_LOCK(&sm_mutex);
#define RELEASE_SM_LOCK RELEASE_LOCK(&sm_mutex);
#else
Expand Down
30 changes: 30 additions & 0 deletions ghc/rts/Capability.c
Expand Up @@ -22,6 +22,9 @@
#include "OSThreads.h"
#include "Capability.h"
#include "Schedule.h" /* to get at EMPTY_RUN_QUEUE() */
#if defined(SMP)
#include "Hash.h"
#endif

#if !defined(SMP)
Capability MainCapability; /* for non-SMP, we have one global capability */
Expand Down Expand Up @@ -81,6 +84,11 @@ static rtsBool passingCapability = rtsFalse;
* Free capability list.
*/
Capability *free_capabilities;

/*
* Maps OSThreadId to Capability *
*/
HashTable *capability_hash;
#endif

#ifdef SMP
Expand Down Expand Up @@ -133,6 +141,8 @@ initCapabilities( void )
free_capabilities = &capabilities[0];
rts_n_free_capabilities = n;

capability_hash = allocHashTable();

IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n));
#else
capabilities = &MainCapability;
Expand Down Expand Up @@ -164,6 +174,7 @@ grabCapability( Capability** cap )
*cap = free_capabilities;
free_capabilities = (*cap)->link;
rts_n_free_capabilities--;
insertHashTable(capability_hash, osThreadId(), *cap);
#else
# if defined(RTS_SUPPORTS_THREADS)
ASSERT(rts_n_free_capabilities == 1);
Expand All @@ -176,6 +187,23 @@ grabCapability( Capability** cap )
#endif
}

/* ----------------------------------------------------------------------------
* Function: myCapability(void)
*
* Purpose: Return the capability owned by the current thread.
* Should not be used if the current thread does not
* hold a Capability.
* ------------------------------------------------------------------------- */
Capability *
myCapability (void)
{
#if defined(SMP)
return lookupHashTable(capability_hash, osThreadId());
#else
return &MainCapability;
#endif
}

/* ----------------------------------------------------------------------------
* Function: releaseCapability(Capability*)
*
Expand All @@ -195,6 +223,8 @@ releaseCapability( Capability* cap UNUSED_IF_NOT_SMP )
#if defined(SMP)
cap->link = free_capabilities;
free_capabilities = cap;
ASSERT(myCapability() == cap);
removeHashTable(capability_hash, osThreadId(), NULL);
#endif
// Check to see whether a worker thread can be given
// the go-ahead to return the result of an external call..
Expand Down
4 changes: 4 additions & 0 deletions ghc/rts/Capability.h
Expand Up @@ -38,6 +38,10 @@ extern void releaseCapability( Capability* cap );
//
extern void threadRunnable ( void );

// Return the capability that I own.
//
extern Capability *myCapability (void);

extern void prodWorker ( void );

#ifdef RTS_SUPPORTS_THREADS
Expand Down
7 changes: 6 additions & 1 deletion ghc/rts/GC.c
Expand Up @@ -739,7 +739,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
if (stp->is_compacted) {
collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W;
} else {
collected += stp->n_blocks * BLOCK_SIZE_W;
if (g == 0 && s == 0) {
collected += countNurseryBlocks() * BLOCK_SIZE_W;
collected += alloc_blocks;
} else {
collected += stp->n_blocks * BLOCK_SIZE_W;
}
}

/* free old memory and shift to-space into from-space for all
Expand Down
3 changes: 2 additions & 1 deletion ghc/rts/Makefile
Expand Up @@ -319,7 +319,8 @@ SRC_HC_OPTS += \
-\#include LdvProfile.h \
-\#include Profiling.h \
-\#include OSThreads.h \
-\#include Apply.h
-\#include Apply.h \
-\#include SMP.h

ifeq "$(Windows)" "YES"
PrimOps_HC_OPTS += -\#include '<windows.h>' -\#include win32/AsyncIO.h
Expand Down

0 comments on commit bf82198

Please sign in to comment.