Skip to content

Commit

Permalink
HPA: Make purging/hugifying more principled.
Browse files Browse the repository at this point in the history
Before this change, purge/hugify decisions had several sharp edges that could
lead to pathological behavior if tuning parameters weren't carefully chosen.
It's the first of a series; this introduces basic "make every hugepage with
dirty pages purgeable" functionality, and the next commit expands that
functionality to have a smarter policy for picking hugepages to purge.

Previously, the dehugify logic would *never* dehugify a hugepage unless it was
dirtier than the dehugification threshold.  This can lead to situations in which
these pages (which themselves could never be purged) would push us above the
maximum allowed dirty pages in the shard.  This forces immediate purging of any
pages deallocated in non-hugified hugepages, which in turn places nonobvious
practical limitations on the relationships between various config settings.

Instead, we make our preference not to dehugify to purge a soft one rather than
a hard one.  We'll avoid purging them, but only so long as we can do so by
purging non-hugified pages.  If we need to purge them to satisfy our dirty page
limits, or to hugify other, more worthy candidates, we'll still do so.
  • Loading branch information
davidtgoldblatt committed Feb 19, 2021
1 parent 6bddb92 commit 0f6c420
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 75 deletions.
76 changes: 59 additions & 17 deletions include/jemalloc/internal/hpdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,42 @@
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/typed_list.h"

/*
* How badly we want to purge some region of memory. This is a temporary
* definition; it gets deleted in the next commit (where we adopt a more
* explicit dirtiest-first policy that only considers hugification status).
*/
enum hpdata_purge_level_e {
/*
* The level number is important -- we use it as indices into an array
* of size 2 (one for each purge level).
*/

/* "Regular" candidates for purging. */
hpdata_purge_level_default = 0,

/*
* Candidates for purging, but as a last resort. Practically,
* nonpreferred corresponds to hugified regions that are below the
* hugification threshold but have not yet reached the dehugification
* threshold, while strongly nonpreferred candidates are those which are
* above the hugification threshold.
*/
hpdata_purge_level_nonpreferred = 1,
hpdata_purge_level_strongly_nonpreferred = 2,

/* Don't purge, no matter what. */
hpdata_purge_level_never = 2,

/*
* How big an array has to be to accomodate all purge levels. This
* relies on the fact that we don't actually keep unpurgable hpdatas in
* a container.
*/
hpdata_purge_level_count = hpdata_purge_level_never
};
typedef enum hpdata_purge_level_e hpdata_purge_level_t;

/*
* The metadata representation we use for extents in hugepages. While the PAC
* uses the edata_t to represent both active and inactive extents, the HP only
Expand Down Expand Up @@ -52,8 +88,8 @@ struct hpdata_s {
bool h_in_psset_alloc_container;

/* The same, but with purging. */
bool h_purge_allowed;
bool h_in_psset_purge_container;
uint8_t h_purge_level;
uint8_t h_purge_container_level;

/* And with hugifying. */
bool h_hugify_allowed;
Expand Down Expand Up @@ -164,26 +200,26 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
hpdata->h_in_psset_alloc_container = in_container;
}

static inline bool
hpdata_purge_allowed_get(const hpdata_t *hpdata) {
return hpdata->h_purge_allowed;
static inline hpdata_purge_level_t
hpdata_purge_level_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_level;
}

static inline void
hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
assert(purge_allowed == false || !hpdata->h_mid_purge);
hpdata->h_purge_allowed = purge_allowed;
hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
hpdata->h_purge_level = (uint8_t)level;
}

static inline bool
hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
return hpdata->h_in_psset_purge_container;
static inline hpdata_purge_level_t
hpdata_purge_container_level_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_container_level;
}

static inline void
hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
assert(in_container != hpdata->h_in_psset_purge_container);
hpdata->h_in_psset_purge_container = in_container;
hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
assert(level != hpdata->h_purge_container_level);
hpdata->h_purge_container_level = level;
}

static inline bool
Expand Down Expand Up @@ -284,6 +320,11 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
return hpdata->h_ntouched - hpdata->h_nactive;
}

static inline size_t
hpdata_nretained_get(hpdata_t *hpdata) {
return hpdata->h_nactive - hpdata->h_ntouched;
}

static inline void
hpdata_assert_empty(hpdata_t *hpdata) {
assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
Expand Down Expand Up @@ -316,11 +357,12 @@ hpdata_consistent(hpdata_t *hpdata) {
return false;
}
if (hpdata_changing_state_get(hpdata)
&& (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
&& ((hpdata->h_purge_level != hpdata_purge_level_never)
|| hpdata->h_hugify_allowed)) {
return false;
}
if (hpdata_purge_allowed_get(hpdata)
!= hpdata_in_psset_purge_container_get(hpdata)) {
if (hpdata_purge_level_get(hpdata)
!= hpdata_purge_container_level_get(hpdata)) {
return false;
}
if (hpdata_hugify_allowed_get(hpdata)
Expand Down
4 changes: 2 additions & 2 deletions include/jemalloc/internal/psset.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ struct psset_s {
* allocations.
*/
hpdata_empty_list_t empty;
/* Slabs which are available to be purged. */
hpdata_purge_list_t to_purge;
/* Slabs which are available to be purged, ordered by purge level. */
hpdata_purge_list_t to_purge[hpdata_purge_level_count];
/* Slabs which are available to be hugified. */
hpdata_hugify_list_t to_hugify;
};
Expand Down
105 changes: 70 additions & 35 deletions src/hpa.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,34 +151,59 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
>= shard->opts.hugification_threshold;
}

static bool
hpa_should_purge(hpa_shard_t *shard) {
static size_t
hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
return psset_ndirty(&shard->psset) - shard->npending_purge;
}

static size_t
hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (shard->opts.dirty_mult == (fxp_t)-1) {
return false;
return (size_t)-1;
}
size_t adjusted_ndirty = psset_ndirty(&shard->psset)
- shard->npending_purge;
/*
* Another simple static check; purge whenever dirty exceeds 25% of
* active.
*/
size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
return fxp_mul_frac(psset_nactive(&shard->psset),
shard->opts.dirty_mult);
return adjusted_ndirty > max_ndirty;
}

static bool
hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
if (to_hugify == NULL) {
return false;
}
return hpa_adjusted_ndirty(tsdn, shard)
+ hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
}

static bool
hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
return true;
}
if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
return true;
}
return false;
}

static void
hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
hpdata_t *ps) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpdata_changing_state_get(ps)) {
hpdata_purge_allowed_set(ps, false);
hpdata_purge_level_set(ps, hpdata_purge_level_never);
hpdata_hugify_allowed_set(ps, false);
return;
}
/*
* Hugepages are distinctly costly to purge, so do it only if they're
* *particularly* full of dirty pages. Eventually, we should use a
* smarter / more dynamic heuristic for situations where we have to
* manually hugify.
* Hugepages are distinctly costly to purge, so try to avoid it unless
* they're *particularly* full of dirty pages. Eventually, we should
* use a smarter / more dynamic heuristic for situations where we have
* to manually hugify.
*
* In situations where we don't manually hugify, this problem is
* reduced. The "bad" situation we're trying to avoid is one's that's
Expand All @@ -195,17 +220,23 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
* deferred; in that case we don't need any explicit calls on the
* allocator's end at all; we just try to pack allocations in a
* hugepage-friendly manner and let the OS hugify in the background.
*
* Anyways, our strategy to delay dehugification is to only consider
* purging a hugified hugepage if it's individually dirtier than the
* overall max dirty pages setting. That setting is 1 dirty page per 4
* active pages; i.e. 4/5s of hugepage pages must be active.
*/
if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
|| (hpdata_ndirty_get(ps) != 0
&& hpdata_ndirty_get(ps) * PAGE
>= shard->opts.dehugification_threshold)) {
hpdata_purge_allowed_set(ps, true);
if (hpdata_ndirty_get(ps) > 0) {
if (hpdata_huge_get(ps)) {
if (hpa_good_hugification_candidate(shard, ps)) {
hpdata_purge_level_set(ps,
hpdata_purge_level_strongly_nonpreferred);
} else if (hpdata_ndirty_get(ps) * PAGE
>= shard->opts.dehugification_threshold) {
hpdata_purge_level_set(ps,
hpdata_purge_level_nonpreferred);
} else {
hpdata_purge_level_set(ps,
hpdata_purge_level_default);
}
} else {
hpdata_purge_level_set(ps, hpdata_purge_level_default);
}
}
if (hpa_good_hugification_candidate(shard, ps)
&& !hpdata_huge_get(ps)) {
Expand Down Expand Up @@ -286,7 +317,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
if (to_purge == NULL) {
return false;
}
assert(hpdata_purge_allowed_get(to_purge));
assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
assert(!hpdata_changing_state_get(to_purge));

/*
Expand All @@ -297,7 +328,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_purge);
assert(hpdata_alloc_allowed_get(to_purge));
hpdata_mid_purge_set(to_purge, true);
hpdata_purge_allowed_set(to_purge, false);
hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
hpdata_hugify_allowed_set(to_purge, false);
/*
* Unlike with hugification (where concurrent
Expand Down Expand Up @@ -352,7 +383,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
hpdata_mid_purge_set(to_purge, false);

hpdata_alloc_allowed_set(to_purge, true);
hpa_update_purge_hugify_eligibility(shard, to_purge);
hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);

psset_update_end(&shard->psset, to_purge);

Expand All @@ -364,6 +395,10 @@ static bool
hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);

if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
return false;
}

hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
if (to_hugify == NULL) {
return false;
Expand All @@ -378,7 +413,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
*/
psset_update_begin(&shard->psset, to_hugify);
hpdata_mid_hugify_set(to_hugify, true);
hpdata_purge_allowed_set(to_hugify, false);
hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
hpdata_hugify_allowed_set(to_hugify, false);
assert(hpdata_alloc_allowed_get(to_hugify));
psset_update_end(&shard->psset, to_hugify);
Expand All @@ -401,7 +436,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_hugify);
hpdata_hugify(to_hugify);
hpdata_mid_hugify_set(to_hugify, false);
hpa_update_purge_hugify_eligibility(shard, to_hugify);
hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify);
psset_update_end(&shard->psset, to_hugify);

return true;
Expand All @@ -419,7 +454,7 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
hugified = hpa_try_hugify(tsdn, shard);

purged = false;
if (hpa_should_purge(shard)) {
if (hpa_should_purge(tsdn, shard)) {
purged = hpa_try_purge(tsdn, shard);
}
malloc_mutex_assert_owner(tsdn, &shard->mtx);
Expand Down Expand Up @@ -491,7 +526,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
return NULL;
}

hpa_update_purge_hugify_eligibility(shard, ps);
hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
psset_update_end(&shard->psset, ps);
return edata;
}
Expand Down Expand Up @@ -703,7 +738,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {

psset_update_begin(&shard->psset, ps);
hpdata_unreserve(ps, unreserve_addr, unreserve_size);
hpa_update_purge_hugify_eligibility(shard, ps);
hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
psset_update_end(&shard->psset, ps);
hpa_do_deferred_work(tsdn, shard);
}
Expand Down
4 changes: 2 additions & 2 deletions src/hpdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata->h_huge = false;
hpdata->h_alloc_allowed = true;
hpdata->h_in_psset_alloc_container = false;
hpdata->h_purge_allowed = false;
hpdata->h_in_psset_purge_container = false;
hpdata->h_purge_level = hpdata_purge_level_never;
hpdata->h_purge_container_level = hpdata_purge_level_never;
hpdata->h_hugify_allowed = false;
hpdata->h_in_psset_hugify_container = false;
hpdata->h_mid_purge = false;
Expand Down
Loading

0 comments on commit 0f6c420

Please sign in to comment.