From 598228168e3a0911338759a6ca6222df7ab2d34f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Mar 2019 04:44:15 -0500 Subject: [PATCH 1/3] osd/osd_types,mon: add pg_autoscale_bias pool property Signed-off-by: Sage Weil (cherry picked from commit f845fa17af0c83e1c09b2086b53176354c2bce3f) --- src/mon/MonCommands.h | 4 ++-- src/mon/OSDMonitor.cc | 11 ++++++++++- src/osd/osd_types.cc | 4 +++- src/osd/osd_types.h | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index fdbee9dde1689..b89e98c34da2d 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -992,11 +992,11 @@ COMMAND("osd pool rename " \ "rename to ", "osd", "rw") COMMAND("osd pool get " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_num_min|target_size_bytes|target_size_ratio", \ + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio", \ "get pool parameter ", "osd", "r") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_num_min|target_size_bytes|target_size_ratio " \ + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio " \ "name=val,type=CephString " \ "name=yes_i_really_mean_it,type=CephBool,req=false", \ "set pool parameter to ", "osd", "rw") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 5e995d4bc857e..c115b5c335cdb 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -4553,7 +4553,8 @@ namespace { COMPRESSION_MODE, COMPRESSION_ALGORITHM, COMPRESSION_REQUIRED_RATIO, COMPRESSION_MAX_BLOB_SIZE, COMPRESSION_MIN_BLOB_SIZE, CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, - PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO }; + PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO, + PG_AUTOSCALE_BIAS }; std::set subtract_second_from_first(const std::set& first, @@ -5253,6 +5254,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"pg_num_min", PG_NUM_MIN}, {"target_size_bytes", TARGET_SIZE_BYTES}, {"target_size_ratio", TARGET_SIZE_RATIO}, + {"pg_autoscale_bias", PG_AUTOSCALE_BIAS}, }; typedef std::set choices_set_t; @@ -5468,6 +5470,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case PG_NUM_MIN: case TARGET_SIZE_BYTES: case TARGET_SIZE_RATIO: + case PG_AUTOSCALE_BIAS: pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; if (p->opts.is_set(key)) { if(*it == CSUM_TYPE) { @@ -5624,6 +5627,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case PG_NUM_MIN: case TARGET_SIZE_BYTES: case TARGET_SIZE_RATIO: + case PG_AUTOSCALE_BIAS: for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) { if (i->second == *it) break; @@ -7618,6 +7622,11 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, << "Ceph internal implementation restrictions"; return -EINVAL; } + } else if (var == "pg_autoscale_bias") { + if (f < 0.0 || f > 1000.0) { + ss << "pg_autoscale_bias must be between 0 and 1000"; + return -EINVAL; + } } pool_opts_t::opt_desc_t desc = pool_opts_t::get_opt_desc(var); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index af3f0d70a01c3..5c4407b596cfb 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1158,7 +1158,9 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of ("target_size_bytes", pool_opts_t::opt_desc_t( pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT)) ("target_size_ratio", pool_opts_t::opt_desc_t( - pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE)); + pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE)) + ("pg_autoscale_bias", pool_opts_t::opt_desc_t( + pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE)); bool pool_opts_t::is_opt_name(const std::string& name) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c5df4d389566a..c5fa0ca7dc38e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1019,6 +1019,7 @@ class pool_opts_t { PG_NUM_MIN, // min pg_num TARGET_SIZE_BYTES, // total bytes in pool TARGET_SIZE_RATIO, // fraction of total cluster + PG_AUTOSCALE_BIAS, }; enum type_t { From 51090e139e8052742d77d92bd3993d0423f0d58c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Mar 2019 06:29:53 -0500 Subject: [PATCH 2/3] mgr/pg_autoscaler: include pg_autoscale_bias in autoscale-status table Signed-off-by: Sage Weil (cherry picked from commit f1d3be6ac7a9e5092a5a610ac3f5a59c74e25bc7) --- src/pybind/mgr/pg_autoscaler/module.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pybind/mgr/pg_autoscaler/module.py b/src/pybind/mgr/pg_autoscaler/module.py index e92377eb4bdbb..2a1cf978c4afa 100644 --- a/src/pybind/mgr/pg_autoscaler/module.py +++ b/src/pybind/mgr/pg_autoscaler/module.py @@ -109,6 +109,7 @@ def _command_autoscale_status(self, cmd): table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE', 'RATE', 'RAW CAPACITY', 'RATIO', 'TARGET RATIO', + 'BIAS', 'PG_NUM', # 'IDEAL', 'NEW PG_NUM', 'AUTOSCALE'], @@ -120,6 +121,7 @@ def _command_autoscale_status(self, cmd): table.align['RAW CAPACITY'] = 'r' table.align['RATIO'] = 'r' table.align['TARGET RATIO'] = 'r' + table.align['BIAS'] = 'r' table.align['PG_NUM'] = 'r' # table.align['IDEAL'] = 'r' table.align['NEW PG_NUM'] = 'r' @@ -145,6 +147,7 @@ def _command_autoscale_status(self, cmd): mgr_util.format_bytes(p['subtree_capacity'], 6), '%.4f' % p['capacity_ratio'], tr, + p['bias'], p['pg_num_target'], # p['pg_num_ideal'], final, @@ -318,6 +321,7 @@ def _get_pool_status( 'pg_num_ideal': int(pool_pg_target), 'pg_num_final': final_pg_target, 'would_adjust': adjust, + 'bias': p.get('options', {}).get('pg_autoscale_bias', 1.0), }); return (ret, root_map, pool_root) From 791dd8c302cca70e43e54becc383f6bd5c4a7196 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Mar 2019 06:39:28 -0500 Subject: [PATCH 3/3] mgr/pg_autoscaler: apply bias to pg_num selection This is a relatively naive way to apply the bias: we just multiply it to whatever we would have chosen. A more clever approach would be to factor this into the overall cluster-wide PG budget, so that biasing one pool's PGs up would put downward pressure on other pools. That is significantly more complicated, however, and (I think) not worth the effort. Signed-off-by: Sage Weil (cherry picked from commit e7ad0eeaaa3a73b5b78764d86eb634ca7072afd1) --- src/pybind/mgr/pg_autoscaler/module.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/pg_autoscaler/module.py b/src/pybind/mgr/pg_autoscaler/module.py index 2a1cf978c4afa..2c9c6dbe3231c 100644 --- a/src/pybind/mgr/pg_autoscaler/module.py +++ b/src/pybind/mgr/pg_autoscaler/module.py @@ -268,6 +268,7 @@ def _get_pool_status( raw_used_rate = osdmap.pool_raw_used_rate(pool_id) pool_logical_used = pool_stats[pool_id]['bytes_used'] + bias = p['options'].get('pg_autoscale_bias', 1.0) target_bytes = p['options'].get('target_size_bytes', 0) # What proportion of space are we using? @@ -281,16 +282,17 @@ def _get_pool_status( final_ratio = max(capacity_ratio, target_ratio) # So what proportion of pg allowance should we be using? - pool_pg_target = (final_ratio * root_map[root_id].pg_target) / raw_used_rate + pool_pg_target = (final_ratio * root_map[root_id].pg_target) / raw_used_rate * bias final_pg_target = max(p['options'].get('pg_num_min', PG_NUM_MIN), nearest_power_of_two(pool_pg_target)) - self.log.info("Pool '{0}' root_id {1} using {2} of space, " - "pg target {3} quantized to {4} (current {5})".format( + self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, " + "pg target {4} quantized to {5} (current {6})".format( p['pool_name'], root_id, final_ratio, + bias, pool_pg_target, final_pg_target, p['pg_num_target']