Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decommit region tails #66008

Merged
merged 10 commits into from
Mar 7, 2022
157 changes: 116 additions & 41 deletions src/coreclr/gc/gc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11545,6 +11545,7 @@ void gc_heap::decommit_heap_segment_pages (heap_segment* seg,
{
if (use_large_pages_p)
return;

uint8_t* page_start = align_on_page (heap_segment_allocated(seg));
assert (heap_segment_committed (seg) >= page_start);

Expand All @@ -11560,12 +11561,6 @@ void gc_heap::decommit_heap_segment_pages (heap_segment* seg,
size_t gc_heap::decommit_heap_segment_pages_worker (heap_segment* seg,
uint8_t* new_committed)
{
#ifdef USE_REGIONS
if (!dt_high_memory_load_p())
{
return 0;
}
#endif
assert (!use_large_pages_p);
uint8_t* page_start = align_on_page (new_committed);
ptrdiff_t size = heap_segment_committed (seg) - page_start;
Expand Down Expand Up @@ -12350,8 +12345,7 @@ void gc_heap::distribute_free_regions()
heap_budget_in_region_units[i][large_free_region] = 0;
for (int gen = soh_gen0; gen < total_generation_count; gen++)
{
ptrdiff_t budget_gen = hp->estimate_gen_growth (gen);
assert (budget_gen >= 0);
ptrdiff_t budget_gen = max (hp->estimate_gen_growth (gen), 0);
int kind = gen >= loh_generation;
size_t budget_gen_in_region_units = (budget_gen + (region_size[kind] - 1)) / region_size[kind];
dprintf (REGIONS_LOG, ("h%2d gen %d has an estimated growth of %Id bytes (%Id regions)", i, gen, budget_gen, budget_gen_in_region_units));
Expand Down Expand Up @@ -12498,7 +12492,6 @@ void gc_heap::distribute_free_regions()
}

#ifdef MULTIPLE_HEAPS
gradual_decommit_in_progress_p = FALSE;
for (int kind = basic_free_region; kind < count_free_region_kinds; kind++)
{
if (global_regions_to_decommit[kind].get_num_free_regions() != 0)
Expand Down Expand Up @@ -22141,7 +22134,7 @@ void gc_heap::garbage_collect (int n)
}

descr_generations ("BEGIN");
#ifdef TRACE_GC
#if defined(TRACE_GC) && defined(USE_REGIONS)
if (heap_number == 0)
{
#ifdef MULTIPLE_HEAPS
Expand All @@ -22165,7 +22158,7 @@ void gc_heap::garbage_collect (int n)
}
}
}
#endif // TRACE_GC
#endif // TRACE_GC && USE_REGIONS

#ifdef VERIFY_HEAP
if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) &&
Expand Down Expand Up @@ -30220,7 +30213,7 @@ heap_segment* gc_heap::find_first_valid_region (heap_segment* region, bool compa
set_region_plan_gen_num (current_region, plan_gen_num);
}

if (gen_num != 0)
if (gen_num >= soh_gen2)
{
dprintf (REGIONS_LOG, (" gen%d decommit end of region %Ix(%Ix)",
gen_num, current_region, heap_segment_mem (current_region)));
Expand Down Expand Up @@ -39561,7 +39554,7 @@ ptrdiff_t gc_heap::estimate_gen_growth (int gen_number)
gen_number, heap_number, budget_gen, new_allocation_gen, free_list_space_gen));
#endif //USE_REGIONS

return max(0, budget_gen);
return budget_gen;
}

void gc_heap::decommit_ephemeral_segment_pages()
Expand All @@ -39572,14 +39565,65 @@ void gc_heap::decommit_ephemeral_segment_pages()
}

#if defined(MULTIPLE_HEAPS) && defined(USE_REGIONS)
// for regions, this is done at the regions level
return;
for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++)
{
generation *gen = generation_of (gen_number);
heap_segment* tail_region = generation_tail_region (gen);
uint8_t* previous_decommit_target = heap_segment_decommit_target (tail_region);

Copy link
Member

@Maoni0 Maoni0 Mar 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we are not even doing a gen1 GC, would we need to goto soh_gen1 here?

if we are doing a gen1 GC we would have called decommit_heap_segment_pages and decommitted (almost) everything after heap_segment_allocated for gen1 regions -

            if (gen_num != 0)
            {
                dprintf (REGIONS_LOG, ("  gen%d decommit end of region %Ix(%Ix)",
                    gen_num, current_region, heap_segment_mem (current_region)));
                decommit_heap_segment_pages (current_region, 0);
            }

if dt_high_memory_load_p() is true. so you could avoid doing the gen1 region in that case. however, I'm also fine if you want to avoid complicating this code path here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now we are not decommitting end of region space for gen2 since it's only doing the decommitting when memory load is high, this could still make a difference for small benchmarks (as in, regions would use more memory)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two reasons for considering the situation in soh_gen1 even if we are not doing a gen 1 GC:

  • The free list situation in gen 1 could have changed.
  • We are smoothing the ramp down, and gen 1 GCs may happen too infrequently. The alternative would be to not do the smoothing, but I think that would likely perform worse in cases where the work load changes behavior.

We need to consider what to do with gen 2 (and UOH as well), agreed. This should be a separate PR, I think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually I think for now we should keep the behavior for decommit_heap_segment_pages for now, for gen2/UOH - there's no reason not to because that's only done during a full blocking GC. and it's the same behavior for segments. we can make a separate PR to improve this behavior.

// reset the decommit targets to make sure we don't decommit inadvertently
for (heap_segment* region = generation_start_segment_rw (gen); region != nullptr; region = heap_segment_next (region))
{
heap_segment_decommit_target (region) = heap_segment_reserved (region);
}

ptrdiff_t budget_gen = estimate_gen_growth (gen_number) + loh_size_threshold;

if (budget_gen >= 0)
{
// we need more than the regions we have - nothing to decommit
continue;
}

// we may have too much committed - let's see if we can decommit in the tail region
ptrdiff_t tail_region_size = heap_segment_reserved (tail_region) - heap_segment_mem (tail_region);
ptrdiff_t unneeded_tail_size = min (-budget_gen, tail_region_size);
uint8_t *decommit_target = heap_segment_reserved (tail_region) - unneeded_tail_size;
decommit_target = max (decommit_target, heap_segment_allocated (tail_region));

if (decommit_target < previous_decommit_target)
{
// we used to have a higher target - do exponential smoothing by computing
// essentially decommit_target = 1/3*decommit_target + 2/3*previous_decommit_target
// computation below is slightly different to avoid overflow
ptrdiff_t target_decrease = previous_decommit_target - decommit_target;
decommit_target += target_decrease * 2 / 3;
}

heap_segment_decommit_target (tail_region) = decommit_target;

if (decommit_target < heap_segment_committed (tail_region))
{
gradual_decommit_in_progress_p = TRUE;

dprintf (1, ("h%2d gen %d reduce_commit by %IdkB",
heap_number,
gen_number,
(heap_segment_committed (tail_region) - decommit_target)/1024));
}
dprintf(3, ("h%2d gen %d allocated: %IdkB committed: %IdkB target: %IdkB",
heap_number,
gen_number,
(heap_segment_allocated (tail_region) - heap_segment_mem (tail_region))/1024,
(heap_segment_committed (tail_region) - heap_segment_mem (tail_region))/1024,
(decommit_target - heap_segment_mem (tail_region))/1024));
}
#else //MULTIPLE_HEAPS && USE_REGIONS

dynamic_data* dd0 = dynamic_data_of (0);

ptrdiff_t desired_allocation = dd_new_allocation (dd0) +
estimate_gen_growth (soh_gen1) +
max (estimate_gen_growth (soh_gen1), 0) +
loh_size_threshold;

size_t slack_space =
Expand Down Expand Up @@ -39686,7 +39730,7 @@ bool gc_heap::decommit_step ()
}
}
}
#else //USE_REGIONS
#endif //USE_REGIONS
#ifdef MULTIPLE_HEAPS
// should never get here for large pages because decommit_ephemeral_segment_pages
// will not do anything if use_large_pages_p is true
Expand All @@ -39698,46 +39742,77 @@ bool gc_heap::decommit_step ()
decommit_size += hp->decommit_ephemeral_segment_pages_step ();
}
#endif //MULTIPLE_HEAPS
#endif //USE_REGIONS
return (decommit_size != 0);
}

#ifdef MULTIPLE_HEAPS
// return the decommitted size
#ifndef USE_REGIONS
size_t gc_heap::decommit_ephemeral_segment_pages_step ()
{
// we rely on desired allocation not being changed outside of GC
assert (ephemeral_heap_segment->saved_desired_allocation == dd_desired_allocation (dynamic_data_of (0)));

uint8_t* decommit_target = heap_segment_decommit_target (ephemeral_heap_segment);
size_t EXTRA_SPACE = 2 * OS_PAGE_SIZE;
decommit_target += EXTRA_SPACE;
uint8_t* committed = heap_segment_committed (ephemeral_heap_segment);
if (decommit_target < committed)
size_t size = 0;
#ifdef USE_REGIONS
for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++)
{
generation* gen = generation_of (gen_number);
heap_segment* seg = generation_tail_region (gen);
#else // USE_REGIONS
{
// we rely on other threads not messing with committed if we are about to trim it down
assert (ephemeral_heap_segment->saved_committed == heap_segment_committed (ephemeral_heap_segment));
heap_segment* seg = ephemeral_heap_segment;
// we rely on desired allocation not being changed outside of GC
assert (seg->saved_desired_allocation == dd_desired_allocation (dynamic_data_of (0)));
#endif // USE_REGIONS

// how much would we need to decommit to get to decommit_target in one step?
size_t full_decommit_size = (committed - decommit_target);
uint8_t* decommit_target = heap_segment_decommit_target (seg);
size_t EXTRA_SPACE = 2 * OS_PAGE_SIZE;
decommit_target += EXTRA_SPACE;
uint8_t* committed = heap_segment_committed (seg);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not take the msl if gen_number is soh_gen1 since it's unnecessary. taking the msl here is kind of worrisome for perf - this thread is running at THREAD_PRIORITY_HIGHEST which means it will actually likely keep running till it's done with all the decommit work here (this also means the decommit for regions on the decommit list might occupy heap0's core for quite some time before it can be used for anything else). while lower priority threads can run on other cores (and most likely will get to), it's still good to be conscientious about not holding up one core for too long. we do call SetThreadIdealProcessorEx on heap0's proc when we want to allocate on heap0 but that's just the ideal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I skipped taking the msl lock for soh_gen1.

Regarding tying up GC thread 0, hopefully we won't be tying it up for very long, because we limit the amount of decommit to max_decommit_step_size.

uint8_t* allocated = heap_segment_allocated (seg);
if ((allocated <= decommit_target) && (decommit_target < committed))
{
#ifdef USE_REGIONS
if (gen_number == soh_gen0)
{
// for gen 0, sync with the allocator by taking the more space lock
// and re-read the variables
enter_spin_lock (&more_space_lock_soh);
add_saved_spinlock_info (false, me_acquire, mt_decommit_step);
decommit_target = heap_segment_decommit_target (seg);
decommit_target += EXTRA_SPACE;
committed = heap_segment_committed (seg);
allocated = heap_segment_allocated (seg);
}
if ((allocated <= decommit_target) && (decommit_target < committed))
#else // USE_REGIONS
// we rely on other threads not messing with committed if we are about to trim it down
assert (seg->saved_committed == heap_segment_committed (seg));
#endif // USE_REGIONS
{
// how much would we need to decommit to get to decommit_target in one step?
size_t full_decommit_size = (committed - decommit_target);

// don't do more than max_decommit_step_size per step
size_t decommit_size = min (max_decommit_step_size, full_decommit_size);
// don't do more than max_decommit_step_size per step
size_t decommit_size = min (max_decommit_step_size, full_decommit_size);

// figure out where the new committed should be
uint8_t* new_committed = (committed - decommit_size);
size_t size = decommit_heap_segment_pages_worker (ephemeral_heap_segment, new_committed);
// figure out where the new committed should be
uint8_t* new_committed = (committed - decommit_size);
size += decommit_heap_segment_pages_worker (seg, new_committed);

#ifdef _DEBUG
ephemeral_heap_segment->saved_committed = committed - size;
seg->saved_committed = committed - size;
#endif // _DEBUG

return size;
}
#ifdef USE_REGIONS
if (gen_number == soh_gen0)
{
// for gen 0, we took the more space lock - leave it again
add_saved_spinlock_info (false, me_release, mt_decommit_step);
leave_spin_lock (&more_space_lock_soh);
}
#endif // USE_REGIONS
}
}
return 0;
return size;
}
#endif //!USE_REGIONS
#endif //MULTIPLE_HEAPS

//This is meant to be called by decide_on_compacting.
Expand Down
11 changes: 4 additions & 7 deletions src/coreclr/gc/gcpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1036,7 +1036,8 @@ enum msl_take_state
mt_alloc_large_cant,
mt_try_alloc,
mt_try_budget,
mt_try_servo_budget
mt_try_servo_budget,
mt_decommit_step
};

enum msl_enter_state
Expand Down Expand Up @@ -2012,10 +2013,10 @@ class gc_heap
void reset_heap_segment_pages (heap_segment* seg);
PER_HEAP
void decommit_heap_segment_pages (heap_segment* seg, size_t extra_space);
#if defined(MULTIPLE_HEAPS) && !defined(USE_REGIONS)
#if defined(MULTIPLE_HEAPS)
PER_HEAP
size_t decommit_ephemeral_segment_pages_step ();
#endif //MULTIPLE_HEAPS && !USE_REGIONS
#endif //MULTIPLE_HEAPS
PER_HEAP
size_t decommit_heap_segment_pages_worker (heap_segment* seg, uint8_t *new_committed);
PER_HEAP_ISOLATED
Expand Down Expand Up @@ -5596,9 +5597,7 @@ class heap_segment
size_t saved_desired_allocation;
#endif // _DEBUG
#endif //MULTIPLE_HEAPS
#if !defined(MULTIPLE_HEAPS) || !defined(USE_REGIONS)
uint8_t* decommit_target;
#endif //!MULTIPLE_HEAPS || !USE_REGIONS
uint8_t* plan_allocated;
// In the plan phase we change the allocated for a seg but we need this
// value to correctly calculate how much space we can reclaim in
Expand Down Expand Up @@ -5897,13 +5896,11 @@ uint8_t*& heap_segment_committed (heap_segment* inst)
{
return inst->committed;
}
#if !defined(MULTIPLE_HEAPS) || !defined(USE_REGIONS)
inline
uint8_t*& heap_segment_decommit_target (heap_segment* inst)
{
return inst->decommit_target;
}
#endif //!MULTIPLE_HEAPS || !USE_REGIONS
inline
uint8_t*& heap_segment_used (heap_segment* inst)
{
Expand Down