Skip to content

Commit

Permalink
Improve Parallelization of Remembered Set Scanning in Gencon
Browse files Browse the repository at this point in the history
Improve the parallelization of remembered set scanning
in the final stop-the-world phase of the concurrent global
GC cycle in Gencon. Remove old solution that used command
line option  -XXgc:dirtCardDuringRSScan.

Signed-off-by: Jonathan Oommen <jon.oommen@gmail.com>
  • Loading branch information
jonoommen committed Feb 4, 2021
1 parent f38780c commit b429468
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 35 deletions.
4 changes: 1 addition & 3 deletions gc/base/GCExtensionsBase.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2020 IBM Corp. and others
* Copyright (c) 1991, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -559,7 +559,6 @@ class MM_GCExtensionsBase : public MM_BaseVirtual {
double concurrentSlackFragmentationAdjustmentWeight; /**< weight(from 0.0 to 5.0) used for calculating free tenure space (how much percentage of the fragmentation need to remove from freeBytes) */
bool debugConcurrentMark;
bool optimizeConcurrentWB;
bool dirtCardDuringRSScan;
uintptr_t concurrentLevel;
uintptr_t concurrentBackground;
uintptr_t concurrentSlack; /**< number of bytes to add to the concurrent kickoff threshold buffer */
Expand Down Expand Up @@ -1635,7 +1634,6 @@ class MM_GCExtensionsBase : public MM_BaseVirtual {
, concurrentSlackFragmentationAdjustmentWeight(0.0)
, debugConcurrentMark(false)
, optimizeConcurrentWB(true)
, dirtCardDuringRSScan(false)
, concurrentLevel(8)
, concurrentBackground(1)
, concurrentSlack(0)
Expand Down
56 changes: 24 additions & 32 deletions gc/base/standard/ConcurrentGC.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2020 IBM Corp. and others
* Copyright (c) 1991, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -3499,15 +3499,13 @@ MM_ConcurrentGC::finalCleanCards(MM_EnvironmentBase *env)
void
MM_ConcurrentGC::scanRememberedSet(MM_EnvironmentBase *env)
{
OMRPORT_ACCESS_FROM_ENVIRONMENT(env);
MM_SublistPuddle *puddle;
omrobjectptr_t *slotPtr, objectPtr;
uintptr_t RSObjects = 0;
uintptr_t bytesTraced = 0;
uintptr_t maxPushes = _markingScheme->getWorkPackets()->getSlotsInPacket() / 2;
/* Get a fresh work stack */
env->_workStack.reset(env, _markingScheme->getWorkPackets());
env->_workStack.clearPushCount();
env->_markStats.clear();

GC_SublistIterator rememberedSetIterator(&_extensions->rememberedSet);
while((puddle = rememberedSetIterator.nextList()) != NULL) {
Expand All @@ -3523,50 +3521,44 @@ MM_ConcurrentGC::scanRememberedSet(MM_EnvironmentBase *env)
&& (objectPtr < _heapAlloc)
&& _markingScheme->isMarkedOutline(objectPtr)
&& !_cardTable->isObjectInDirtyCardNoCheck(env,objectPtr)) {
RSObjects += 1;
if (_extensions->dirtCardDuringRSScan) {
_cardTable->dirtyCard(env, objectPtr);
} else {
/* VMDESIGN 2048 -- due to barrier elision optimizations, the JIT may not have dirtied
* cards for some objects in the remembered set. Therefore we may discover references
* to both nursery and tenure objects while scanning remembered objects.
/* VMDESIGN 2048 -- due to barrier elision optimizations, the JIT may not have dirtied
* cards for some objects in the remembered set. Therefore we may discover references
* to both nursery and tenure objects while scanning remembered objects.
*/
_markingScheme->scanObject(env,objectPtr, SCAN_REASON_REMEMBERED_SET_SCAN);

/* Have we pushed enough new references? */
if(env->_workStack.getPushCount() >= maxPushes) {
/* To reduce the chances of mark stack overflow, we do some marking
* of what we have just pushed.
*
* WARNING. If we HALTED concurrent then we will process any remaining
* workpackets at this point. This will make RS processing appear more
* expensive than it really is.
*/

bytesTraced += _markingScheme->scanObject(env,objectPtr, SCAN_REASON_REMEMBERED_SET_SCAN);

/* Have we pushed enough new references? */
if(env->_workStack.getPushCount() >= maxPushes) {
/* To reduce the chances of mark stack overflow, we do some marking
* of what we have just pushed.
*
* WARNING. If we HALTED concurrent then we will process any remaining
* workpackets at this point. This will make RS processing appear more
* expensive than it really is.
*/
while(NULL != (objectPtr = (omrobjectptr_t)env->_workStack.popNoWait(env))) {
bytesTraced += _markingScheme->scanObject(env, objectPtr, SCAN_REASON_PACKET);
}
env->_workStack.clearPushCount();
while(NULL != (objectPtr = (omrobjectptr_t)env->_workStack.popNoWait(env))) {
_markingScheme->scanObject(env, objectPtr, SCAN_REASON_PACKET);
}
env->_workStack.clearPushCount();
}
}
}
}
}

env->_workStack.clearPushCount();
/* sort of abusing addToWorkStallTime to record the point when thread is finished with RS Scan Work
* Since popNoWait is used, we never stalled before this point. All stall time will be from this point till RS scan end event */
env->_workPacketStats.addToWorkStallTime(0, omrtime_hires_clock());

/* Call completeScan to allow for improved RS scanning parallelism */
_markingScheme->completeScan(env);

/* Flush this threads reference object buffer, work stack, returning any packets to appropriate lists */
flushLocalBuffers(env);

/* Add RS objects found to global count */
_stats.incRSObjectsFound(RSObjects);
_stats.incRSObjectsFound(env->_markStats._objectsScanned);

/* ..and amount traced */
_stats.incRSScanTraceCount(bytesTraced);
_stats.incRSScanTraceCount(env->_markStats._bytesScanned);
}

/**
Expand Down

0 comments on commit b429468

Please sign in to comment.