Skip to content

Commit

Permalink
Merge 'lsa: background reclaim' from Avi Kivity
Browse files Browse the repository at this point in the history
Add a background fiber that works to free memory using spare cycles, so that
allocations don't have to evict cache synchronously. The shares for the fiber
are increased the closer we are to running out of memory, preferring to steal
cycles from the workload rather than encountering stalls.

The last patch is not strictly related but is a good idea.

See backport notes in the first patch. The others were trivial.

Test: unit (dev)

Ref scylladb#2113
Ref scylladb#2106
Ref scylladb#2071
Ref scylladb#2039

Closes scylladb#2129

* github.com:scylladb/scylla-enterprise:
  lsa: Mark compact_segment_locked() as noexcept
  lsa: Avoid excessive eviction if region is not compactible
  logalloc: fix quadratic behaviour of reclaim_from_evictable
  logalloc: reduce minimum lsa reserve in allocating_section to 1
  main: start background reclaim before bootstrap
  Merge 'lsa: background reclaim' from Avi Kivity
  logalloc: background reclaim
  • Loading branch information
denesb committed Mar 7, 2022
2 parents 9a3370b + b674896 commit b022822
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 46 deletions.
2 changes: 1 addition & 1 deletion configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,7 +1383,7 @@ def configure_seastar(build_dir, mode):
'-DSeastar_CXX_DIALECT=gnu++20',
'-DSeastar_API_LEVEL=6',
'-DSeastar_UNUSED_RESULT_ERROR=ON',
'-DSeastar_SCHEDULING_GROUPS_COUNT=17',
'-DSeastar_SCHEDULING_GROUPS_COUNT=18',
]

if args.stack_guards is not None:
Expand Down
25 changes: 17 additions & 8 deletions main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,24 @@ int main(int ac, char** av) {
return seastar::scheduling_group();
}
};
auto background_reclaim_scheduling_group = make_sched_group("background_reclaim", 50);
auto maintenance_scheduling_group = make_sched_group("streaming", 200);

smp::invoke_on_all([&cfg, background_reclaim_scheduling_group] {
logalloc::tracker::config st_cfg;
st_cfg.defragment_on_idle = cfg->defragment_memory_on_idle();
st_cfg.abort_on_lsa_bad_alloc = cfg->abort_on_lsa_bad_alloc();
st_cfg.lsa_reclamation_step = cfg->lsa_reclamation_step();
st_cfg.background_reclaim_sched_group = background_reclaim_scheduling_group;
logalloc::shard_tracker().configure(st_cfg);
}).get();

auto stop_lsa_background_reclaim = defer([&] {
smp::invoke_on_all([&] {
return logalloc::shard_tracker().stop();
}).get();
});

uint16_t api_port = cfg->api_port();
ctx.api_dir = cfg->api_ui_dir();
ctx.api_doc = cfg->api_doc_dir();
Expand Down Expand Up @@ -1327,14 +1344,6 @@ int main(int ac, char** av) {
}).get();
}

smp::invoke_on_all([&cfg] {
logalloc::tracker::config st_cfg;
st_cfg.defragment_on_idle = cfg->defragment_memory_on_idle();
st_cfg.abort_on_lsa_bad_alloc = cfg->abort_on_lsa_bad_alloc();
st_cfg.lsa_reclamation_step = cfg->lsa_reclamation_step();
logalloc::shard_tracker().configure(st_cfg);
}).get();

seastar::set_abort_on_ebadf(cfg->abort_on_ebadf());
api::set_server_done(ctx).get();
supervisor::notify("serving");
Expand Down
81 changes: 81 additions & 0 deletions test/boost/logalloc_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "utils/managed_ref.hh"
#include "utils/managed_bytes.hh"
#include "test/lib/log.hh"
#include "test/lib/random_utils.hh"
#include "log.hh"

[[gnu::unused]]
Expand Down Expand Up @@ -1450,4 +1451,84 @@ SEASTAR_THREAD_TEST_CASE(test_decay_reserves) {
BOOST_REQUIRE_LE(reclaims, expected_reclaims);
}

SEASTAR_THREAD_TEST_CASE(background_reclaim) {
prime_segment_pool(memory::stats().total_memory(), memory::min_free_memory()).get(); // if previous test cases muddied the pool

region evictable;
std::vector<managed_bytes> evictable_allocs;

auto& rnd = seastar::testing::local_random_engine;

auto clean_up = defer([&] {
with_allocator(evictable.allocator(), [&] {
evictable_allocs.clear();
});
});


// Fill up memory with allocations
size_t lsa_alloc_size = 300;

while (true) {
try {
with_allocator(evictable.allocator(), [&] {
evictable_allocs.push_back(managed_bytes(managed_bytes::initialized_later(), lsa_alloc_size));
});
} catch (std::bad_alloc&) {
break;
}
}

// make the reclaimer work harder
std::shuffle(evictable_allocs.begin(), evictable_allocs.end(), rnd);

evictable.make_evictable([&] () -> memory::reclaiming_result {
if (evictable_allocs.empty()) {
return memory::reclaiming_result::reclaimed_nothing;
}
with_allocator(evictable.allocator(), [&] {
evictable_allocs.pop_back();
});
return memory::reclaiming_result::reclaimed_something;
});

// Set up the background reclaimer

auto background_reclaim_scheduling_group = create_scheduling_group("background_reclaim", 100).get0();
auto kill_sched_group = defer([&] {
destroy_scheduling_group(background_reclaim_scheduling_group).get();
});

logalloc::tracker::config st_cfg;
st_cfg.defragment_on_idle = false;
st_cfg.abort_on_lsa_bad_alloc = false;
st_cfg.lsa_reclamation_step = 1;
st_cfg.background_reclaim_sched_group = background_reclaim_scheduling_group;
logalloc::shard_tracker().configure(st_cfg);

auto stop_lsa_background_reclaim = defer([&] {
return logalloc::shard_tracker().stop().get();
});

sleep(500ms).get(); // sleep a little, to give the reclaimer a head start

std::vector<managed_bytes> std_allocs;
size_t std_alloc_size = 1000000; // note that managed_bytes fragments these, even in std
for (int i = 0; i < 50; ++i) {
auto compacted_pre = logalloc::memory_compacted();
fmt::print("compacted {} items {} (pre)\n", compacted_pre, evictable_allocs.size());
std_allocs.emplace_back(managed_bytes::initialized_later(), std_alloc_size);
auto compacted_post = logalloc::memory_compacted();
fmt::print("compacted {} items {} (post)\n", compacted_post, evictable_allocs.size());
BOOST_REQUIRE_EQUAL(compacted_pre, compacted_post);

// Pretend to do some work. Sleeping would be too easy, as the background reclaim group would use
// all that time.
auto deadline = std::chrono::steady_clock::now() + 100ms;
while (std::chrono::steady_clock::now() < deadline) {
thread::maybe_yield();
}
}
}

#endif

0 comments on commit b022822

Please sign in to comment.