diff --git a/ghc/includes/Cmm.h b/ghc/includes/Cmm.h index e989a00e6219..415dc4c056c0 100644 --- a/ghc/includes/Cmm.h +++ b/ghc/includes/Cmm.h @@ -317,8 +317,11 @@ HP_CHK_GEN(alloc,liveness,reentry); \ TICK_ALLOC_HEAP_NOCTR(alloc); +// allocateLocal() allocates from the nursery, so we check to see +// whether the nursery is nearly empty in any function that uses +// allocateLocal() - this includes many of the primops. #define MAYBE_GC(liveness,reentry) \ - if (CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \ + if (bdescr_link(CurrentNursery) == NULL) { \ R9 = liveness; \ R10 = reentry; \ jump stg_gc_gen_hp; \ diff --git a/ghc/includes/Regs.h b/ghc/includes/Regs.h index 02032384127c..5374972b526e 100644 --- a/ghc/includes/Regs.h +++ b/ghc/includes/Regs.h @@ -87,7 +87,8 @@ typedef struct StgRegTable_ { StgPtr rHpLim; struct StgTSO_ *rCurrentTSO; struct step_ *rNursery; - struct bdescr_ *rCurrentNursery; + struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */ + struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */ StgWord rHpAlloc; /* number of *bytes* being allocated in heap */ #if defined(SMP) || defined(PAR) StgSparkPool rSparks; /* per-task spark pool */ diff --git a/ghc/includes/Rts.h b/ghc/includes/Rts.h index fb2a70bc229f..27331bee8535 100644 --- a/ghc/includes/Rts.h +++ b/ghc/includes/Rts.h @@ -110,13 +110,8 @@ extern void _assertFail (char *, unsigned int); #include "Parallel.h" /* STG/Optimised-C related stuff */ -#include "SMP.h" #include "Block.h" -#ifdef SMP -#include -#endif - /* GNU mp library */ #include "gmp.h" diff --git a/ghc/includes/SMP.h b/ghc/includes/SMP.h index e35b95b33794..86930f9b4ec0 100644 --- a/ghc/includes/SMP.h +++ b/ghc/includes/SMP.h @@ -1,6 +1,6 @@ /* ---------------------------------------------------------------------------- * - * (c) The GHC Team, 1999 + * (c) The GHC Team, 2005 * * Macros for SMP support * @@ -23,60 +23,37 @@ #error Build options incompatible with SMP. #endif -/* - * CMPXCHG - this instruction is the standard "test & set". We use it - * for locking closures in the thunk and blackhole entry code. If the - * closure is already locked, or has an unexpected info pointer - * (because another thread is altering it in parallel), we just jump - * to the new entry point. - */ -#if defined(i386_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE) -#define CMPXCHG(p, cmp, new) \ - __asm__ __volatile__ ( \ - "lock ; cmpxchg %1, %0\n" \ - "\tje 1f\n" \ - "\tjmp *%%eax\n" \ - "\t1:\n" \ - : /* no outputs */ \ - : "m" (p), "r" (new), "r" (cmp) \ - ) - /* * XCHG - the atomic exchange instruction. Used for locking closures * during updates (see LOCK_CLOSURE below) and the MVar primops. */ -#define XCHG(reg, obj) \ - __asm__ __volatile__ ( \ - "xchgl %1,%0" \ - :"+r" (reg), "+m" (obj) \ - : /* no input-only operands */ \ - ) - +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord result; + result = w; + __asm__ __volatile__ ( + "xchgl %1,%0" + :"+r" (result), "+m" (*p) + : /* no input-only operands */ + ); + return result; +} + +INLINE_HEADER StgInfoTable * +lockClosure(StgClosure *p) +{ + StgWord info; +#if 0 + do { + info = xchg((P_)&p->header.info, (W_)&stg_WHITEHOLE_info); + if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info; + yieldThread(); + } while (1); #else -#error SMP macros not defined for this architecture + info = p->header.info; #endif - -/* - * LOCK_CLOSURE locks the specified closure, busy waiting for any - * existing locks to be cleared. - */ -#define LOCK_CLOSURE(c) \ - ({ \ - const StgInfoTable *__info; \ - __info = &stg_WHITEHOLE_info; \ - do { \ - XCHG(__info,((StgClosure *)(c))->header.info); \ - } while (__info == &stg_WHITEHOLE_info); \ - __info; \ - }) - -#define LOCK_THUNK(__info) \ - CMPXCHG(R1.cl->header.info, __info, &stg_WHITEHOLE_info); - -#else /* !SMP */ - -#define LOCK_CLOSURE(c) /* nothing */ -#define LOCK_THUNK(__info) /* nothing */ +} #endif /* SMP */ diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index f8332aad2e04..026c2cf260d3 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -95,6 +95,7 @@ RTS_INFO(stg_IND_OLDGEN_info); RTS_INFO(stg_IND_OLDGEN_PERM_info); RTS_INFO(stg_CAF_UNENTERED_info); RTS_INFO(stg_CAF_ENTERED_info); +RTS_INFO(stg_WHITEHOLE_info); RTS_INFO(stg_BLACKHOLE_info); RTS_INFO(stg_CAF_BLACKHOLE_info); #ifdef TICKY_TICKY @@ -155,6 +156,7 @@ RTS_ENTRY(stg_IND_OLDGEN_entry); RTS_ENTRY(stg_IND_OLDGEN_PERM_entry); RTS_ENTRY(stg_CAF_UNENTERED_entry); RTS_ENTRY(stg_CAF_ENTERED_entry); +RTS_ENTRY(stg_WHITEHOLE_entry); RTS_ENTRY(stg_BLACKHOLE_entry); RTS_ENTRY(stg_CAF_BLACKHOLE_entry); #ifdef TICKY_TICKY diff --git a/ghc/includes/Storage.h b/ghc/includes/Storage.h index a8a6d2414870..0ef5785a85f0 100644 --- a/ghc/includes/Storage.h +++ b/ghc/includes/Storage.h @@ -139,6 +139,7 @@ extern void exitStorage(void); -------------------------------------------------------------------------- */ extern StgPtr allocate ( nat n ); +extern StgPtr allocateLocal ( StgRegTable *reg, nat n ); extern StgPtr allocatePinned ( nat n ); extern lnat allocated_bytes ( void ); @@ -193,6 +194,9 @@ extern void GarbageCollect(void (*get_roots)(evac_fn),rtsBool force_major_gc); */ #if defined(SMP) extern Mutex sm_mutex; +#endif + +#if defined(SMP) #define ACQUIRE_SM_LOCK ACQUIRE_LOCK(&sm_mutex); #define RELEASE_SM_LOCK RELEASE_LOCK(&sm_mutex); #else diff --git a/ghc/rts/Capability.c b/ghc/rts/Capability.c index 1e2d3d657d8b..0ae4688e984c 100644 --- a/ghc/rts/Capability.c +++ b/ghc/rts/Capability.c @@ -22,6 +22,9 @@ #include "OSThreads.h" #include "Capability.h" #include "Schedule.h" /* to get at EMPTY_RUN_QUEUE() */ +#if defined(SMP) +#include "Hash.h" +#endif #if !defined(SMP) Capability MainCapability; /* for non-SMP, we have one global capability */ @@ -81,6 +84,11 @@ static rtsBool passingCapability = rtsFalse; * Free capability list. */ Capability *free_capabilities; + +/* + * Maps OSThreadId to Capability * + */ +HashTable *capability_hash; #endif #ifdef SMP @@ -133,6 +141,8 @@ initCapabilities( void ) free_capabilities = &capabilities[0]; rts_n_free_capabilities = n; + capability_hash = allocHashTable(); + IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n)); #else capabilities = &MainCapability; @@ -164,6 +174,7 @@ grabCapability( Capability** cap ) *cap = free_capabilities; free_capabilities = (*cap)->link; rts_n_free_capabilities--; + insertHashTable(capability_hash, osThreadId(), *cap); #else # if defined(RTS_SUPPORTS_THREADS) ASSERT(rts_n_free_capabilities == 1); @@ -176,6 +187,23 @@ grabCapability( Capability** cap ) #endif } +/* ---------------------------------------------------------------------------- + * Function: myCapability(void) + * + * Purpose: Return the capability owned by the current thread. + * Should not be used if the current thread does not + * hold a Capability. + * ------------------------------------------------------------------------- */ +Capability * +myCapability (void) +{ +#if defined(SMP) + return lookupHashTable(capability_hash, osThreadId()); +#else + return &MainCapability; +#endif +} + /* ---------------------------------------------------------------------------- * Function: releaseCapability(Capability*) * @@ -195,6 +223,8 @@ releaseCapability( Capability* cap UNUSED_IF_NOT_SMP ) #if defined(SMP) cap->link = free_capabilities; free_capabilities = cap; + ASSERT(myCapability() == cap); + removeHashTable(capability_hash, osThreadId(), NULL); #endif // Check to see whether a worker thread can be given // the go-ahead to return the result of an external call.. diff --git a/ghc/rts/Capability.h b/ghc/rts/Capability.h index 21d4ce4c1e02..f1615dc4e235 100644 --- a/ghc/rts/Capability.h +++ b/ghc/rts/Capability.h @@ -38,6 +38,10 @@ extern void releaseCapability( Capability* cap ); // extern void threadRunnable ( void ); +// Return the capability that I own. +// +extern Capability *myCapability (void); + extern void prodWorker ( void ); #ifdef RTS_SUPPORTS_THREADS diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index db05ef5f0724..fce011a9dc05 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -739,7 +739,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) if (stp->is_compacted) { collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W; } else { - collected += stp->n_blocks * BLOCK_SIZE_W; + if (g == 0 && s == 0) { + collected += countNurseryBlocks() * BLOCK_SIZE_W; + collected += alloc_blocks; + } else { + collected += stp->n_blocks * BLOCK_SIZE_W; + } } /* free old memory and shift to-space into from-space for all diff --git a/ghc/rts/Makefile b/ghc/rts/Makefile index b5648490506d..802ce6120afb 100644 --- a/ghc/rts/Makefile +++ b/ghc/rts/Makefile @@ -319,7 +319,8 @@ SRC_HC_OPTS += \ -\#include LdvProfile.h \ -\#include Profiling.h \ -\#include OSThreads.h \ - -\#include Apply.h + -\#include Apply.h \ + -\#include SMP.h ifeq "$(Windows)" "YES" PrimOps_HC_OPTS += -\#include '' -\#include win32/AsyncIO.h diff --git a/ghc/rts/PrimOps.cmm b/ghc/rts/PrimOps.cmm index ff1b44222457..cdca63457b7f 100644 --- a/ghc/rts/PrimOps.cmm +++ b/ghc/rts/PrimOps.cmm @@ -49,7 +49,7 @@ newByteArrayzh_fast n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - "ptr" p = foreign "C" allocate(words); + "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -97,7 +97,7 @@ newArrayzh_fast MAYBE_GC(R2_PTR,newArrayzh_fast); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - "ptr" arr = foreign "C" allocate(words); + "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]); @@ -1429,14 +1429,14 @@ takeMVarzh_fast { W_ mvar, val, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar closure */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. @@ -1453,7 +1453,7 @@ takeMVarzh_fast StgMVar_tail(mvar) = CurrentTSO; #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump stg_block_takemvar; @@ -1486,7 +1486,7 @@ takeMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif RET_P(val); @@ -1494,16 +1494,10 @@ takeMVarzh_fast else { /* No further putMVars, MVar is now empty */ - - /* do this last... we might have locked the MVar in the SMP case, - * and writing the info pointer will unlock it. - */ - SET_INFO(mvar,stg_EMPTY_MVAR_info); StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; - -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif + + /* unlocks the closure in the SMP case */ + SET_INFO(mvar,stg_EMPTY_MVAR_info); RET_P(val); } @@ -1514,23 +1508,23 @@ tryTakeMVarzh_fast { W_ mvar, val, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar closure */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_EMPTY_MVAR_info) { +#if defined(SMP) + SET_INFO(mvar,stg_EMPTY_MVAR_info); +#endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure */ -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif RET_NP(0, stg_NO_FINALIZER_closure); } @@ -1559,6 +1553,9 @@ tryTakeMVarzh_fast if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } +#if defined(SMP) + SET_INFO(mvar,stg_FULL_MVAR_info); +#endif } else { @@ -1567,10 +1564,6 @@ tryTakeMVarzh_fast SET_INFO(mvar,stg_EMPTY_MVAR_info); } -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif - RET_NP(1, val); } @@ -1579,14 +1572,14 @@ putMVarzh_fast { W_ mvar, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar, R2 = value */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_FULL_MVAR_info) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { @@ -1600,7 +1593,7 @@ putMVarzh_fast StgMVar_tail(mvar) = CurrentTSO; #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif jump stg_block_putmvar; } @@ -1628,7 +1621,7 @@ putMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump %ENTRY_CODE(Sp(0)); } @@ -1639,9 +1632,6 @@ putMVarzh_fast /* unlocks the MVar in the SMP case */ SET_INFO(mvar,stg_FULL_MVAR_info); -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif jump %ENTRY_CODE(Sp(0)); } @@ -1653,18 +1643,18 @@ tryPutMVarzh_fast { W_ mvar, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar, R2 = value */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_FULL_MVAR_info) { #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif RET_N(0); } @@ -1692,7 +1682,7 @@ tryPutMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump %ENTRY_CODE(Sp(0)); } @@ -1702,9 +1692,7 @@ tryPutMVarzh_fast StgMVar_value(mvar) = R2; /* unlocks the MVar in the SMP case */ SET_INFO(mvar,stg_FULL_MVAR_info); -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif + jump %ENTRY_CODE(Sp(0)); } diff --git a/ghc/rts/StgMiscClosures.cmm b/ghc/rts/StgMiscClosures.cmm index 4e2c0fbe46c7..15f27d6bcc94 100644 --- a/ghc/rts/StgMiscClosures.cmm +++ b/ghc/rts/StgMiscClosures.cmm @@ -418,6 +418,12 @@ INFO_TABLE(stg_SE_CAF_BLACKHOLE,0,1,SE_CAF_BLACKHOLE,"SE_CAF_BLACKHOLE","SE_CAF_ { foreign "C" barf("SE_CAF_BLACKHOLE object entered!"); } #endif +/* ---------------------------------------------------------------------------- + ------------------------------------------------------------------------- */ + +INFO_TABLE(stg_WHITEHOLE, 0,0, INVALID_OBJECT, "WHITEHOLE", "WHITEHOLE") +{ foreign "C" barf("WHITEHOLE object entered!"); } + /* ---------------------------------------------------------------------------- Some static info tables for things that don't get entered, and therefore don't need entry code (i.e. boxed but unpointed objects) diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c index f466a58ac9eb..7e07ff203abf 100644 --- a/ghc/rts/Storage.c +++ b/ghc/rts/Storage.c @@ -26,6 +26,9 @@ #include #include +/* + * All these globals require sm_mutex to access in SMP mode. + */ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; @@ -405,10 +408,12 @@ assignNurseriesToCapabilities (void) for (i = 0; i < n_nurseries; i++) { capabilities[i].r.rNursery = &nurseries[i]; capabilities[i].r.rCurrentNursery = nurseries[i].blocks; + capabilities[i].r.rCurrentAlloc = NULL; } #else /* SMP */ MainCapability.r.rNursery = &nurseries[0]; MainCapability.r.rCurrentNursery = nurseries[0].blocks; + MainCapability.r.rCurrentAlloc = NULL; #endif } @@ -534,49 +539,49 @@ resizeNurseries (nat blocks) StgPtr allocate( nat n ) { - bdescr *bd; - StgPtr p; + bdescr *bd; + StgPtr p; - ACQUIRE_SM_LOCK; + ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation ( alloc_HpLim) { + if (small_alloc_list) { + small_alloc_list->free = alloc_Hp; + } + bd = allocBlock(); + bd->link = small_alloc_list; + small_alloc_list = bd; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = 0; + alloc_Hp = bd->start; + alloc_HpLim = bd->start + BLOCK_SIZE_W; + alloc_blocks++; } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; - } - - p = alloc_Hp; - alloc_Hp += n; - RELEASE_SM_LOCK; - return p; + + p = alloc_Hp; + alloc_Hp += n; + RELEASE_SM_LOCK; + return p; } lnat @@ -603,6 +608,82 @@ tidyAllocateLists (void) } } +/* ----------------------------------------------------------------------------- + allocateLocal() + + This allocates memory in the current thread - it is intended for + use primarily from STG-land where we have a Capability. It is + better than allocate() because it doesn't require taking the + sm_mutex lock in the common case. + + Memory is allocated directly from the nursery if possible (but not + from the current nursery block, so as not to interfere with + Hp/HpLim). + -------------------------------------------------------------------------- */ + +StgPtr +allocateLocal( StgRegTable *reg, nat n ) +{ + bdescr *bd; + StgPtr p; + + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + ACQUIRE_SM_LOCK; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation (rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = reg->rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + alloc_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = 0; + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + reg->rCurrentNursery->link = bd->link; + } + bd->link = reg->rNursery->blocks; + reg->rNursery->blocks = bd; + bd->u.back = NULL; + reg->rCurrentAlloc = bd; + } + } + p = bd->free; + bd->free += n; + return p; +} + /* --------------------------------------------------------------------------- Allocate a fixed/pinned object. @@ -690,7 +771,11 @@ stgAllocForGMP (size_t size_in_bytes) total_size_in_words = sizeofW(StgArrWords) + data_size_in_words; /* allocate and fill it in. */ - arr = (StgArrWords *)allocate(total_size_in_words); +#if defined(SMP) + arr = (StgArrWords *)allocateLocal(&(myCapability()->r), total_size_in_words); +#else + arr = (StgArrWords *)allocateLocal(&MainCapability.r, total_size_in_words); +#endif SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words); /* and return a ptr to the goods inside the array */ @@ -740,9 +825,7 @@ calcAllocated( void ) nat i; allocated = allocated_bytes(); - for (i = 0; i < n_nurseries; i++) { - allocated += nurseries[i].n_blocks * BLOCK_SIZE_W; - } + allocated += countNurseryBlocks() * BLOCK_SIZE_W; #ifdef SMP for (i = 0; i < n_nurseries; i++) {