From c10c1ff8e3237689212606c9aa5153beec8a1778 Mon Sep 17 00:00:00 2001 From: Sean Gillespie Date: Fri, 6 Jan 2017 16:21:11 -0800 Subject: [PATCH] [Local GC] Move Software Write Watch's write barrier updates to GCToEEInterface::StompWriteBarrier (#8605) * Move Software Write Watch's write barrier updates to use the new GCToEEInterface::StompWriteBarrier to stomp the EE's write barrier. * Address code review feedback, move SetCardsAfterBulkCopy to EE side of the interface --- src/classlibnative/bcltype/arraynative.cpp | 2 +- src/gc/gc.cpp | 136 +++++---------------- src/gc/gc.h | 2 - src/gc/gccommon.cpp | 2 - src/gc/gcimpl.h | 1 - src/gc/gcinterface.h | 19 ++- src/gc/gcpriv.h | 4 +- src/gc/gcsvr.cpp | 1 + src/gc/gcwks.cpp | 1 + src/gc/sample/GCSample.cpp | 13 +- src/gc/softwarewritewatch.cpp | 11 +- src/gc/softwarewritewatch.h | 40 +++--- src/vm/amd64/jitinterfaceamd64.cpp | 5 +- src/vm/gcenv.ee.cpp | 43 ++++++- src/vm/gcenv.h | 2 - src/vm/gcheaputilities.cpp | 9 +- src/vm/gcheaputilities.h | 103 ++++++++++++++-- src/vm/gchelpers.cpp | 99 +++++++++++++-- src/vm/gchelpers.h | 4 +- 19 files changed, 309 insertions(+), 188 deletions(-) diff --git a/src/classlibnative/bcltype/arraynative.cpp b/src/classlibnative/bcltype/arraynative.cpp index 58fa4dd3e6e7..9baba44c1eea 100644 --- a/src/classlibnative/bcltype/arraynative.cpp +++ b/src/classlibnative/bcltype/arraynative.cpp @@ -961,7 +961,7 @@ void memmoveGCRefs(void *dest, const void *src, size_t len) } } - GCHeapUtilities::GetGCHeap()->SetCardsAfterBulkCopy((Object**)dest, len); + SetCardsAfterBulkCopy((Object**)dest, len); } void ArrayNative::ArrayCopyNoTypeCheck(BASEARRAYREF pSrc, unsigned int srcIndex, BASEARRAYREF pDest, unsigned int destIndex, unsigned int length) diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp index 586da235274f..2dad2ca83d2f 100644 --- a/src/gc/gc.cpp +++ b/src/gc/gc.cpp @@ -1402,9 +1402,6 @@ int mark_time, plan_time, sweep_time, reloc_time, compact_time; #ifndef MULTIPLE_HEAPS -#define ephemeral_low g_gc_ephemeral_low -#define ephemeral_high g_gc_ephemeral_high - #endif // MULTIPLE_HEAPS #ifdef TRACE_GC @@ -2187,27 +2184,22 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b args.card_table = g_gc_card_table; args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (SoftwareWriteWatch::IsEnabledForGCHeap()) + { + args.write_watch_table = g_gc_sw_ww_table; + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP GCToEEInterface::StompWriteBarrier(&args); } -void stomp_write_barrier_ephemeral(bool is_runtime_suspended, uint8_t* ephemeral_lo, uint8_t* ephemeral_hi) +void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_high) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompEphemeral; - args.is_runtime_suspended = is_runtime_suspended; - args.ephemeral_lo = g_gc_ephemeral_low; - args.ephemeral_hi = g_gc_ephemeral_high; -#ifdef MULTIPLE_HEAPS - // It is not correct to update the EE's g_ephemeral_low and g_ephemeral_high - // to anything other than their default values when using Server GC, since - // there is no single ephemeral generation across all of the heaps. - // Server GC write barriers do not reference these two globals, but ErectWriteBarrier does. - // - // When MULTIPLE_HEAPS is defined, g_gc_ephemeral_low and g_gc_ephemeral_high should - // always have their default values. - assert(args.ephemeral_lo == (uint8_t*)1); - assert(args.ephemeral_hi == (uint8_t*)~0); -#endif // MULTIPLE_HEAPS + args.is_runtime_suspended = true; + args.ephemeral_low = ephemeral_low; + args.ephemeral_high = ephemeral_high; GCToEEInterface::StompWriteBarrier(&args); } @@ -2220,6 +2212,8 @@ void stomp_write_barrier_initialize() args.card_table = g_gc_card_table; args.lowest_address = g_gc_lowest_address; args.highest_address = g_gc_highest_address; + args.ephemeral_low = reinterpret_cast(1); + args.ephemeral_high = reinterpret_cast(~0); GCToEEInterface::StompWriteBarrier(&args); } @@ -2430,6 +2424,10 @@ BOOL gc_heap::ro_segments_in_range; size_t gc_heap::gen0_big_free_spaces = 0; +uint8_t* gc_heap::ephemeral_low; + +uint8_t* gc_heap::ephemeral_high; + uint8_t* gc_heap::lowest_address; uint8_t* gc_heap::highest_address; @@ -7277,9 +7275,6 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, } g_gc_card_table = translated_ct; - g_gc_lowest_address = saved_g_lowest_address; - g_gc_highest_address = saved_g_highest_address; - SoftwareWriteWatch::SetResizedUntranslatedTable( mem + sw_ww_table_offset, saved_g_lowest_address, @@ -7290,6 +7285,8 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, // grow version of the write barrier. This test tells us if the new // segment was allocated at a lower address than the old, requiring // that we start doing an upper bounds check in the write barrier. + g_gc_lowest_address = saved_g_lowest_address; + g_gc_highest_address = saved_g_highest_address; stomp_write_barrier_resize(true, la != saved_g_lowest_address); write_barrier_updated = true; @@ -9662,7 +9659,7 @@ void gc_heap::make_generation (generation& gen, heap_segment* seg, uint8_t* star #endif //FREE_USAGE_STATS } -void gc_heap::adjust_ephemeral_limits (bool is_runtime_suspended) +void gc_heap::adjust_ephemeral_limits () { ephemeral_low = generation_allocation_start (generation_of (max_generation - 1)); ephemeral_high = heap_segment_reserved (ephemeral_heap_segment); @@ -9670,8 +9667,10 @@ void gc_heap::adjust_ephemeral_limits (bool is_runtime_suspended) dprintf (3, ("new ephemeral low: %Ix new ephemeral high: %Ix", (size_t)ephemeral_low, (size_t)ephemeral_high)) +#ifndef MULTIPLE_HEAPS // This updates the write barrier helpers with the new info. - stomp_write_barrier_ephemeral(is_runtime_suspended, ephemeral_low, ephemeral_high); + stomp_write_barrier_ephemeral(ephemeral_low, ephemeral_high); +#endif // MULTIPLE_HEAPS } #if defined(TRACE_GC) || defined(GC_CONFIG_DRIVEN) @@ -10466,7 +10465,7 @@ gc_heap::init_gc_heap (int h_number) make_background_mark_stack (b_arr); #endif //BACKGROUND_GC - adjust_ephemeral_limits(true); + adjust_ephemeral_limits(); #ifdef MARK_ARRAY // why would we clear the mark array for this page? it should be cleared.. @@ -15364,7 +15363,8 @@ void gc_heap::gc1() if (!settings.concurrent) #endif //BACKGROUND_GC { - adjust_ephemeral_limits(!!IsGCThread()); + assert(!!IsGCThread()); + adjust_ephemeral_limits(); } #ifdef BACKGROUND_GC @@ -16204,7 +16204,8 @@ BOOL gc_heap::expand_soh_with_minimal_gc() dd_gc_new_allocation (dynamic_data_of (max_generation)) -= ephemeral_size; dd_new_allocation (dynamic_data_of (max_generation)) = dd_gc_new_allocation (dynamic_data_of (max_generation)); - adjust_ephemeral_limits(!!IsGCThread()); + assert(!!IsGCThread()); + adjust_ephemeral_limits(); return TRUE; } else @@ -32778,8 +32779,8 @@ gc_heap::verify_heap (BOOL begin_gc_p) #endif //BACKGROUND_GC #ifndef MULTIPLE_HEAPS - if ((g_gc_ephemeral_low != generation_allocation_start (generation_of (max_generation - 1))) || - (g_gc_ephemeral_high != heap_segment_reserved (ephemeral_heap_segment))) + if ((ephemeral_low != generation_allocation_start (generation_of (max_generation - 1))) || + (ephemeral_high != heap_segment_reserved (ephemeral_heap_segment))) { FATAL_GC_ERROR(); } @@ -35681,85 +35682,6 @@ void GCHeap::SetFinalizationRun (Object* obj) #endif // FEATURE_PREMORTEM_FINALIZATION -//---------------------------------------------------------------------------- -// -// Write Barrier Support for bulk copy ("Clone") operations -// -// StartPoint is the target bulk copy start point -// len is the length of the bulk copy (in bytes) -// -// -// Performance Note: -// -// This is implemented somewhat "conservatively", that is we -// assume that all the contents of the bulk copy are object -// references. If they are not, and the value lies in the -// ephemeral range, we will set false positives in the card table. -// -// We could use the pointer maps and do this more accurately if necessary - -#if defined(_MSC_VER) && defined(_TARGET_X86_) -#pragma optimize("y", on) // Small critical routines, don't put in EBP frame -#endif //_MSC_VER && _TARGET_X86_ - -void -GCHeap::SetCardsAfterBulkCopy( Object **StartPoint, size_t len ) -{ - Object **rover; - Object **end; - - // Target should aligned - assert(Aligned ((size_t)StartPoint)); - - - // Don't optimize the Generation 0 case if we are checking for write barrier voilations - // since we need to update the shadow heap even in the generation 0 case. -#if defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC) - if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) - for(unsigned i=0; i < len / sizeof(Object*); i++) - updateGCShadow(&StartPoint[i], StartPoint[i]); -#endif //WRITE_BARRIER_CHECK && !SERVER_GC - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (SoftwareWriteWatch::IsEnabledForGCHeap()) - { - SoftwareWriteWatch::SetDirtyRegion(StartPoint, len); - } -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - - // If destination is in Gen 0 don't bother - if ( -#ifdef BACKGROUND_GC - (!gc_heap::settings.concurrent) && -#endif //BACKGROUND_GC - (g_theGCHeap->WhichGeneration( (Object*) StartPoint ) == 0)) - return; - - rover = StartPoint; - end = StartPoint + (len/sizeof(Object*)); - while (rover < end) - { - if ( (((uint8_t*)*rover) >= g_gc_ephemeral_low) && (((uint8_t*)*rover) < g_gc_ephemeral_high) ) - { - // Set Bit For Card and advance to next card - size_t card = gcard_of ((uint8_t*)rover); - - Interlocked::Or (&g_gc_card_table[card/card_word_width], (1U << (card % card_word_width))); - // Skip to next card for the object - rover = (Object**)align_on_card ((uint8_t*)(rover+1)); - } - else - { - rover++; - } - } -} - -#if defined(_MSC_VER) && defined(_TARGET_X86_) -#pragma optimize("", on) // Go back to command line default optimizations -#endif //_MSC_VER && _TARGET_X86_ - - #ifdef FEATURE_PREMORTEM_FINALIZATION //-------------------------------------------------------------------- diff --git a/src/gc/gc.h b/src/gc/gc.h index b7f1e956b6e3..6f8626a3d6b9 100644 --- a/src/gc/gc.h +++ b/src/gc/gc.h @@ -140,8 +140,6 @@ class DacHeapWalker; extern "C" uint32_t* g_gc_card_table; extern "C" uint8_t* g_gc_lowest_address; extern "C" uint8_t* g_gc_highest_address; -extern "C" uint8_t* g_gc_ephemeral_low; -extern "C" uint8_t* g_gc_ephemeral_high; namespace WKS { ::IGCHeapInternal* CreateGCHeap(); diff --git a/src/gc/gccommon.cpp b/src/gc/gccommon.cpp index d1ccddd205fc..0292705a1682 100644 --- a/src/gc/gccommon.cpp +++ b/src/gc/gccommon.cpp @@ -41,8 +41,6 @@ uint8_t* g_shadow_lowest_address = NULL; uint32_t* g_gc_card_table; uint8_t* g_gc_lowest_address = 0; uint8_t* g_gc_highest_address = 0; -uint8_t* g_gc_ephemeral_low = (uint8_t*)1; -uint8_t* g_gc_ephemeral_high = (uint8_t*)~0; VOLATILE(int32_t) m_GCLock = -1; diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h index 7e3a13a743d9..cb91c4dc3e08 100644 --- a/src/gc/gcimpl.h +++ b/src/gc/gcimpl.h @@ -198,7 +198,6 @@ class GCHeap : public IGCHeapInternal BOOL FinalizeAppDomain(AppDomain *pDomain, BOOL fRunFinalizers); BOOL ShouldRestartFinalizerWatchDog(); - void SetCardsAfterBulkCopy( Object**, size_t); void DiagWalkObject (Object* obj, walk_fn fn, void* context); public: // FIX diff --git a/src/gc/gcinterface.h b/src/gc/gcinterface.h index 1457848992f8..4ba4e0c637da 100644 --- a/src/gc/gcinterface.h +++ b/src/gc/gcinterface.h @@ -46,7 +46,9 @@ enum class WriteBarrierOp { StompResize, StompEphemeral, - Initialize + Initialize, + SwitchToWriteWatch, + SwitchToNonWriteWatch }; // Arguments to GCToEEInterface::StompWriteBarrier @@ -85,11 +87,15 @@ struct WriteBarrierParameters // The new start of the ephemeral generation. // Used for WriteBarrierOp::StompEphemeral. - uint8_t* ephemeral_lo; + uint8_t* ephemeral_low; // The new end of the ephemeral generation. // Used for WriteBarrierOp::StompEphemeral. - uint8_t* ephemeral_hi; + uint8_t* ephemeral_high; + + // The new write watch table, if we are using our own write watch + // implementation. Used for WriteBarrierOp::SwitchToWriteWatch only. + uint8_t* write_watch_table; }; #include "gcinterface.ee.h" @@ -148,6 +154,10 @@ struct segment_info #define max_generation 2 +// The bit shift used to convert a memory address into an index into the +// Software Write Watch table. +#define SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift 0xc + class Object; class IGCHeap; @@ -398,9 +408,6 @@ class IGCHeap { // sanity checks asserting that a GC has not occured. virtual unsigned GetGcCount() = 0; - // Sets cards after an object has been memmoved. - virtual void SetCardsAfterBulkCopy(Object** obj, size_t length) = 0; - // Gets whether or not the home heap of this alloc context matches the heap // associated with this thread. virtual bool IsThreadUsingAllocationContextHeap(gc_alloc_context* acontext, int thread_number) = 0; diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h index 3bed8c2cf80c..1f97d7f2d506 100644 --- a/src/gc/gcpriv.h +++ b/src/gc/gcpriv.h @@ -1671,7 +1671,7 @@ class gc_heap PER_HEAP void reset_write_watch (BOOL concurrent_p); PER_HEAP - void adjust_ephemeral_limits (bool is_runtime_suspended); + void adjust_ephemeral_limits (); PER_HEAP void make_generation (generation& gen, heap_segment* seg, uint8_t* start, uint8_t* pointer); @@ -2802,13 +2802,11 @@ class gc_heap PER_HEAP void exit_gc_done_event_lock(); -#ifdef MULTIPLE_HEAPS PER_HEAP uint8_t* ephemeral_low; //lowest ephemeral address PER_HEAP uint8_t* ephemeral_high; //highest ephemeral address -#endif //MULTIPLE_HEAPS PER_HEAP uint32_t* card_table; diff --git a/src/gc/gcsvr.cpp b/src/gc/gcsvr.cpp index cf5fc9335f3f..70801dd4ee70 100644 --- a/src/gc/gcsvr.cpp +++ b/src/gc/gcsvr.cpp @@ -13,6 +13,7 @@ #include "gc.h" #include "gcscan.h" #include "gcdesc.h" +#include "softwarewritewatch.h" #define SERVER_GC 1 diff --git a/src/gc/gcwks.cpp b/src/gc/gcwks.cpp index 574df8215a75..5c489df0e0dd 100644 --- a/src/gc/gcwks.cpp +++ b/src/gc/gcwks.cpp @@ -11,6 +11,7 @@ #include "gc.h" #include "gcscan.h" #include "gcdesc.h" +#include "softwarewritewatch.h" #ifdef SERVER_GC #undef SERVER_GC diff --git a/src/gc/sample/GCSample.cpp b/src/gc/sample/GCSample.cpp index 664dc38e9460..112d291420cb 100644 --- a/src/gc/sample/GCSample.cpp +++ b/src/gc/sample/GCSample.cpp @@ -94,14 +94,11 @@ inline void ErectWriteBarrier(Object ** dst, Object * ref) if (((uint8_t*)dst < g_gc_lowest_address) || ((uint8_t*)dst >= g_gc_highest_address)) return; - if((uint8_t*)ref >= g_gc_ephemeral_low && (uint8_t*)ref < g_gc_ephemeral_high) - { - // volatile is used here to prevent fetch of g_card_table from being reordered - // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables. - uint8_t* pCardByte = (uint8_t *)*(volatile uint8_t **)(&g_gc_card_table) + card_byte((uint8_t *)dst); - if(*pCardByte != 0xFF) - *pCardByte = 0xFF; - } + // volatile is used here to prevent fetch of g_card_table from being reordered + // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables. + uint8_t* pCardByte = (uint8_t *)*(volatile uint8_t **)(&g_gc_card_table) + card_byte((uint8_t *)dst); + if(*pCardByte != 0xFF) + *pCardByte = 0xFF; } void WriteBarrier(Object ** dst, Object * ref) diff --git a/src/gc/softwarewritewatch.cpp b/src/gc/softwarewritewatch.cpp index fa14a0489734..b85293857ab5 100644 --- a/src/gc/softwarewritewatch.cpp +++ b/src/gc/softwarewritewatch.cpp @@ -3,10 +3,9 @@ // See the LICENSE file in the project root for more information. #include "common.h" -#include "softwarewritewatch.h" - #include "gcenv.h" #include "env/gcenv.os.h" +#include "softwarewritewatch.h" #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #ifndef DACCESS_COMPILE @@ -15,8 +14,8 @@ static_assert((static_cast(1) << SOFTWARE_WRITE_WATCH_AddressToTableByte extern "C" { - uint8_t *g_sw_ww_table = nullptr; - bool g_sw_ww_enabled_for_gc_heap = false; + uint8_t *g_gc_sw_ww_table = nullptr; + bool g_gc_sw_ww_enabled_for_gc_heap = false; } void SoftwareWriteWatch::StaticClose() @@ -26,8 +25,8 @@ void SoftwareWriteWatch::StaticClose() return; } - g_sw_ww_enabled_for_gc_heap = false; - g_sw_ww_table = nullptr; + g_gc_sw_ww_enabled_for_gc_heap = false; + g_gc_sw_ww_table = nullptr; } bool SoftwareWriteWatch::GetDirtyFromBlock( diff --git a/src/gc/softwarewritewatch.h b/src/gc/softwarewritewatch.h index 3c8491cecbb4..0e6e6c8191d8 100644 --- a/src/gc/softwarewritewatch.h +++ b/src/gc/softwarewritewatch.h @@ -5,25 +5,20 @@ #ifndef __SOFTWARE_WRITE_WATCH_H__ #define __SOFTWARE_WRITE_WATCH_H__ +#include "gcinterface.h" +#include "gc.h" + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #ifndef DACCESS_COMPILE -extern void SwitchToWriteWatchBarrier(bool isRuntimeSuspended); -extern void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended); - -#define SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift 0xc - extern "C" { // Table containing the dirty state. This table is translated to exclude the lowest address it represents, see // TranslateTableToExcludeHeapStartAddress. - extern uint8_t *g_sw_ww_table; + extern uint8_t *g_gc_sw_ww_table; // Write watch may be disabled when it is not needed (between GCs for instance). This indicates whether it is enabled. - extern bool g_sw_ww_enabled_for_gc_heap; - - extern uint8_t *g_lowest_address; // start address of the GC heap - extern uint8_t *g_highest_address; // end address of the GC heap + extern bool g_gc_sw_ww_enabled_for_gc_heap; } class SoftwareWriteWatch @@ -116,7 +111,7 @@ inline void SoftwareWriteWatch::VerifyMemoryRegion( inline uint8_t *SoftwareWriteWatch::GetTable() { - return g_sw_ww_table; + return g_gc_sw_ww_table; } inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable() @@ -163,7 +158,7 @@ inline void SoftwareWriteWatch::SetUntranslatedTable(uint8_t *untranslatedTable, assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable); assert(heapStartAddress != nullptr); - g_sw_ww_table = TranslateTableToExcludeHeapStartAddress(untranslatedTable, heapStartAddress); + g_gc_sw_ww_table = TranslateTableToExcludeHeapStartAddress(untranslatedTable, heapStartAddress); } inline void SoftwareWriteWatch::SetResizedUntranslatedTable( @@ -194,7 +189,7 @@ inline void SoftwareWriteWatch::SetResizedUntranslatedTable( inline bool SoftwareWriteWatch::IsEnabledForGCHeap() { - return g_sw_ww_enabled_for_gc_heap; + return g_gc_sw_ww_enabled_for_gc_heap; } inline void SoftwareWriteWatch::EnableForGCHeap() @@ -204,9 +199,13 @@ inline void SoftwareWriteWatch::EnableForGCHeap() VerifyCreated(); assert(!IsEnabledForGCHeap()); + g_gc_sw_ww_enabled_for_gc_heap = true; - g_sw_ww_enabled_for_gc_heap = true; - SwitchToWriteWatchBarrier(true); + WriteBarrierParameters args = {}; + args.operation = WriteBarrierOp::SwitchToWriteWatch; + args.write_watch_table = g_gc_sw_ww_table; + args.is_runtime_suspended = true; + GCToEEInterface::StompWriteBarrier(&args); } inline void SoftwareWriteWatch::DisableForGCHeap() @@ -216,19 +215,22 @@ inline void SoftwareWriteWatch::DisableForGCHeap() VerifyCreated(); assert(IsEnabledForGCHeap()); + g_gc_sw_ww_enabled_for_gc_heap = false; - g_sw_ww_enabled_for_gc_heap = false; - SwitchToNonWriteWatchBarrier(true); + WriteBarrierParameters args = {}; + args.operation = WriteBarrierOp::SwitchToNonWriteWatch; + args.is_runtime_suspended = true; + GCToEEInterface::StompWriteBarrier(&args); } inline void *SoftwareWriteWatch::GetHeapStartAddress() { - return g_lowest_address; + return g_gc_lowest_address; } inline void *SoftwareWriteWatch::GetHeapEndAddress() { - return g_highest_address; + return g_gc_highest_address; } inline size_t SoftwareWriteWatch::GetTableByteIndex(void *address) diff --git a/src/vm/amd64/jitinterfaceamd64.cpp b/src/vm/amd64/jitinterfaceamd64.cpp index d5dec8e6e8b4..53d8f74f1bc7 100644 --- a/src/vm/amd64/jitinterfaceamd64.cpp +++ b/src/vm/amd64/jitinterfaceamd64.cpp @@ -16,7 +16,6 @@ #include "eeconfig.h" #include "excep.h" #include "threadsuspend.h" -#include "../../gc/softwarewritewatch.h" extern uint8_t* g_ephemeral_low; extern uint8_t* g_ephemeral_high; @@ -532,9 +531,9 @@ void WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSu #ifdef FEATURE_SVR_GC case WRITE_BARRIER_WRITE_WATCH_SVR64: #endif // FEATURE_SVR_GC - if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)SoftwareWriteWatch::GetTable()) + if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) { - *(UINT64*)m_pWriteWatchTableImmediate = (size_t)SoftwareWriteWatch::GetTable(); + *(UINT64*)m_pWriteWatchTableImmediate = (size_t)g_sw_ww_table; fFlushCache = true; } break; diff --git a/src/vm/gcenv.ee.cpp b/src/vm/gcenv.ee.cpp index 3be8384b3df5..5fb83bfa041c 100644 --- a/src/vm/gcenv.ee.cpp +++ b/src/vm/gcenv.ee.cpp @@ -1226,6 +1226,14 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) assert(args->lowest_address != nullptr); assert(args->highest_address != nullptr); g_card_table = args->card_table; +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (args->write_watch_table != nullptr) + { + assert(args->is_runtime_suspended); + g_sw_ww_table = args->write_watch_table; + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ::StompWriteBarrierResize(args->is_runtime_suspended, args->requires_upper_bounds_check); // We need to make sure that other threads executing checked write barriers @@ -1241,10 +1249,10 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) return; case WriteBarrierOp::StompEphemeral: // StompEphemeral requires a new ephemeral low and a new ephemeral high - assert(args->ephemeral_lo != nullptr); - assert(args->ephemeral_hi != nullptr); - g_ephemeral_low = args->ephemeral_lo; - g_ephemeral_high = args->ephemeral_hi; + assert(args->ephemeral_low != nullptr); + assert(args->ephemeral_high != nullptr); + g_ephemeral_low = args->ephemeral_low; + g_ephemeral_high = args->ephemeral_high; ::StompWriteBarrierEphemeral(args->is_runtime_suspended); return; case WriteBarrierOp::Initialize: @@ -1255,6 +1263,8 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) assert(args->card_table != nullptr); assert(args->lowest_address != nullptr); assert(args->highest_address != nullptr); + assert(args->ephemeral_low != nullptr); + assert(args->ephemeral_high != nullptr); assert(args->is_runtime_suspended && "the runtime must be suspended here!"); assert(!args->requires_upper_bounds_check && "the ephemeral generation must be at the top of the heap!"); @@ -1263,6 +1273,31 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) g_lowest_address = args->lowest_address; VolatileStore(&g_highest_address, args->highest_address); ::StompWriteBarrierResize(true, false); + + // g_ephemeral_low/high aren't needed for the write barrier stomp, but they + // are needed in other places. + g_ephemeral_low = args->ephemeral_low; + g_ephemeral_high = args->ephemeral_high; + return; + case WriteBarrierOp::SwitchToWriteWatch: +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + assert(args->write_watch_table != nullptr); + assert(args->is_runtime_suspended && "the runtime must be suspended here!"); + g_sw_ww_table = args->write_watch_table; + g_sw_ww_enabled_for_gc_heap = true; + ::SwitchToWriteWatchBarrier(true); +#else + assert(!"should never be called without FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP"); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + return; + case WriteBarrierOp::SwitchToNonWriteWatch: +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + assert(args->is_runtime_suspended && "the runtime must be suspended here!"); + g_sw_ww_enabled_for_gc_heap = false; + ::SwitchToNonWriteWatchBarrier(true); +#else + assert(!"should never be called without FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP"); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP return; default: assert(!"unknown WriteBarrierOp enum"); diff --git a/src/vm/gcenv.h b/src/vm/gcenv.h index ad5baa262e73..2e23b270b5dc 100644 --- a/src/vm/gcenv.h +++ b/src/vm/gcenv.h @@ -51,8 +51,6 @@ #include "gcenv.interlocked.h" #include "gcenv.interlocked.inl" -#include "../gc/softwarewritewatch.h" - namespace ETW { typedef enum _GC_ROOT_KIND { diff --git a/src/vm/gcheaputilities.cpp b/src/vm/gcheaputilities.cpp index ac24fa34cefe..91f259d275da 100644 --- a/src/vm/gcheaputilities.cpp +++ b/src/vm/gcheaputilities.cpp @@ -16,4 +16,11 @@ uint8_t* g_ephemeral_low = (uint8_t*)1; uint8_t* g_ephemeral_high = (uint8_t*)~0; // This is the global GC heap, maintained by the VM. -GPTR_IMPL(IGCHeap, g_pGCHeap); \ No newline at end of file +GPTR_IMPL(IGCHeap, g_pGCHeap); + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + +uint8_t* g_sw_ww_table = nullptr; +bool g_sw_ww_enabled_for_gc_heap = false; + +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP \ No newline at end of file diff --git a/src/vm/gcheaputilities.h b/src/vm/gcheaputilities.h index e5883fc9192a..e76a21173cfd 100644 --- a/src/vm/gcheaputilities.h +++ b/src/vm/gcheaputilities.h @@ -10,6 +10,31 @@ // The singular heap instance. GPTR_DECL(IGCHeap, g_pGCHeap); +#ifndef DACCESS_COMPILE +extern "C" { +#endif // !DACCESS_COMPILE +GPTR_DECL(uint8_t,g_lowest_address); +GPTR_DECL(uint8_t,g_highest_address); +GPTR_DECL(uint32_t,g_card_table); +#ifndef DACCESS_COMPILE +} +#endif // !DACCESS_COMPILE + +extern "C" uint8_t* g_ephemeral_low; +extern "C" uint8_t* g_ephemeral_high; + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + +// Table containing the dirty state. This table is translated to exclude the lowest address it represents, see +// TranslateTableToExcludeHeapStartAddress. +extern "C" uint8_t *g_sw_ww_table; + +// Write watch may be disabled when it is not needed (between GCs for instance). This indicates whether it is enabled. +extern "C" bool g_sw_ww_enabled_for_gc_heap; + +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + // GCHeapUtilities provides a number of static methods // that operate on the global heap instance. It can't be // instantiated. @@ -108,22 +133,74 @@ class GCHeapUtilities { return IGCHeap::maxGeneration; } +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + // Returns True if software write watch is currently enabled for the GC Heap, + // or False if it is not. + inline static bool SoftwareWriteWatchIsEnabled() + { + WRAPPER_NO_CONTRACT; + + return g_sw_ww_enabled_for_gc_heap; + } + + // In accordance with the SoftwareWriteWatch scheme, marks a given address as + // "dirty" (e.g. has been written to). + inline static void SoftwareWriteWatchSetDirty(void* address, size_t write_size) + { + LIMITED_METHOD_CONTRACT; + + // We presumably have just written something to this address, so it can't be null. + assert(address != nullptr); + + // The implementation is limited to writes of a pointer size or less. Writes larger + // than pointer size may cross page boundaries and would require us to potentially + // set more than one entry in the SWW table, which can't be done atomically under + // the current scheme. + assert(write_size <= sizeof(void*)); + + size_t table_byte_index = reinterpret_cast(address) >> SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift; + + // The table byte index that we calculate for the address should be the same as the one + // calculated for a pointer to the end of the written region. If this were not the case, + // this write crossed a boundary and would dirty two pages. + uint8_t* end_of_write_ptr = reinterpret_cast(address) + (write_size - 1); + assert(table_byte_index == reinterpret_cast(end_of_write_ptr) >> SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift); + uint8_t* table_address = &g_sw_ww_table[table_byte_index]; + if (*table_address == 0) + { + *table_address = 0xFF; + } + } + + // In accordance with the SoftwareWriteWatch scheme, marks a range of addresses + // as dirty, starting at the given address and with the given length. + inline static void SoftwareWriteWatchSetDirtyRegion(void* address, size_t length) + { + LIMITED_METHOD_CONTRACT; + + // We presumably have just memcopied something to this address, so it can't be null. + assert(address != nullptr); + + // The "base index" is the first index in the SWW table that covers the target + // region of memory. + size_t base_index = reinterpret_cast(address) >> SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift; + + // The "end_index" is the last index in the SWW table that covers the target + // region of memory. + uint8_t* end_pointer = reinterpret_cast(address) + length - 1; + size_t end_index = reinterpret_cast(end_pointer) >> SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift; + + // We'll mark the entire region of memory as dirty by memseting all entries in + // the SWW table between the start and end indexes. + memset(&g_sw_ww_table[base_index], ~0, end_index - base_index + 1); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + private: // This class should never be instantiated. GCHeapUtilities() = delete; }; -#ifndef DACCESS_COMPILE -extern "C" { -#endif // !DACCESS_COMPILE -GPTR_DECL(uint8_t,g_lowest_address); -GPTR_DECL(uint8_t,g_highest_address); -GPTR_DECL(uint32_t,g_card_table); -#ifndef DACCESS_COMPILE -} -#endif // !DACCESS_COMPILE - -extern "C" uint8_t* g_ephemeral_low; -extern "C" uint8_t* g_ephemeral_high; - #endif // _GCHEAPUTILITIES_H_ \ No newline at end of file diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp index 20a3a2954063..30f6dd0c8180 100644 --- a/src/vm/gchelpers.cpp +++ b/src/vm/gchelpers.cpp @@ -35,7 +35,6 @@ #endif // FEATURE_COMINTEROP #include "rcwwalker.h" -#include "../gc/softwarewritewatch.h" //======================================================================== // @@ -1241,9 +1240,9 @@ extern "C" HCIMPL2_RAW(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref) #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (SoftwareWriteWatch::IsEnabledForGCHeap()) + if (GCHeapUtilities::SoftwareWriteWatchIsEnabled()) { - SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + GCHeapUtilities::SoftwareWriteWatchSetDirty(dst, sizeof(*dst)); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -1298,9 +1297,9 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrier, Object **dst, Object *ref) #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (SoftwareWriteWatch::IsEnabledForGCHeap()) + if (GCHeapUtilities::SoftwareWriteWatchIsEnabled()) { - SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + GCHeapUtilities::SoftwareWriteWatchSetDirty(dst, sizeof(*dst)); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -1366,9 +1365,9 @@ void ErectWriteBarrier(OBJECTREF *dst, OBJECTREF ref) #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (SoftwareWriteWatch::IsEnabledForGCHeap()) + if (GCHeapUtilities::SoftwareWriteWatchIsEnabled()) { - SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + GCHeapUtilities::SoftwareWriteWatchSetDirty(dst, sizeof(*dst)); } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -1399,10 +1398,11 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref) if (ref->Collectible()) { #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (SoftwareWriteWatch::IsEnabledForGCHeap()) + if (GCHeapUtilities::SoftwareWriteWatchIsEnabled()) { - SoftwareWriteWatch::SetDirty(dst, sizeof(*dst)); + GCHeapUtilities::SoftwareWriteWatchSetDirty(dst, sizeof(*dst)); } + #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP BYTE *refObject = *(BYTE **)((MethodTable*)ref)->GetLoaderAllocatorObjectHandle(); @@ -1417,3 +1417,84 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref) } } } + +//---------------------------------------------------------------------------- +// +// Write Barrier Support for bulk copy ("Clone") operations +// +// StartPoint is the target bulk copy start point +// len is the length of the bulk copy (in bytes) +// +// +// Performance Note: +// +// This is implemented somewhat "conservatively", that is we +// assume that all the contents of the bulk copy are object +// references. If they are not, and the value lies in the +// ephemeral range, we will set false positives in the card table. +// +// We could use the pointer maps and do this more accurately if necessary + +#if defined(_MSC_VER) && defined(_TARGET_X86_) +#pragma optimize("y", on) // Small critical routines, don't put in EBP frame +#endif //_MSC_VER && _TARGET_X86_ + +void +SetCardsAfterBulkCopy(Object **start, size_t len) +{ + // Check whether the writes were even into the heap. If not there's no card update required. + // Also if the size is smaller than a pointer, no write barrier is required. + if ((BYTE*)start < g_lowest_address || (BYTE*)start >= g_highest_address || len < sizeof(uintptr_t)) + { + return; + } + + + // Don't optimize the Generation 0 case if we are checking for write barrier violations + // since we need to update the shadow heap even in the generation 0 case. +#if defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC) + if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) + { + for(unsigned i=0; i < len / sizeof(Object*); i++) + { + updateGCShadow(&start[i], start[i]); + } + } +#endif //WRITE_BARRIER_CHECK && !SERVER_GC + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (GCHeapUtilities::SoftwareWriteWatchIsEnabled()) + { + GCHeapUtilities::SoftwareWriteWatchSetDirtyRegion(start, len); + } +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + + size_t startAddress = (size_t)start; + size_t endAddress = startAddress + len; + size_t startingClump = startAddress >> card_byte_shift; + size_t endingClump = (endAddress + (1 << card_byte_shift) - 1) >> card_byte_shift; + + // calculate the number of clumps to mark (round_up(end) - start) + size_t clumpCount = endingClump - startingClump; + // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered + // with g_lowest/highest_address check at the beginning of this function. + uint8_t* card = ((uint8_t*)VolatileLoadWithoutBarrier(&g_card_table)) + startingClump; + + // Fill the cards. To avoid cache line thrashing we check whether the cards have already been set before + // writing. + do + { + if (*card != 0xff) + { + *card = 0xff; + } + + card++; + clumpCount--; + } + while (clumpCount != 0); +} + +#if defined(_MSC_VER) && defined(_TARGET_X86_) +#pragma optimize("", on) // Go back to command line default optimizations +#endif //_MSC_VER && _TARGET_X86_ \ No newline at end of file diff --git a/src/vm/gchelpers.h b/src/vm/gchelpers.h index f5590beebef5..449524aa9a5c 100644 --- a/src/vm/gchelpers.h +++ b/src/vm/gchelpers.h @@ -109,6 +109,8 @@ OBJECTREF AllocateObject(MethodTable *pMT extern void StompWriteBarrierEphemeral(bool isRuntimeSuspended); extern void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck); +extern void SwitchToWriteWatchBarrier(bool isRuntimeSuspended); +extern void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended); extern void ThrowOutOfMemoryDimensionsExceeded(); @@ -119,5 +121,5 @@ extern void ThrowOutOfMemoryDimensionsExceeded(); //======================================================================== void ErectWriteBarrier(OBJECTREF* dst, OBJECTREF ref); - +void SetCardsAfterBulkCopy(Object **start, size_t len); #endif // _GCHELPERS_H_