Skip to content

Commit

Permalink
mon: Add warning if diff in OSD usage > config mon_warn_osd_usage_per…
Browse files Browse the repository at this point in the history
…cent (10%)

Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit c8004e6)
  • Loading branch information
dzafman authored and smithfarm committed Jul 18, 2017
1 parent 2c2e0a3 commit bc324e0
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/common/config_opts.h
Expand Up @@ -305,6 +305,7 @@ OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
OPTION(mon_warn_osd_usage_percent, OPT_FLOAT, .40) // warn if difference in usage percent between OSDs exceeds specified percent
OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
OPTION(mon_max_log_epochs, OPT_INT, 500)
Expand Down
21 changes: 21 additions & 0 deletions src/mon/PGMonitor.cc
Expand Up @@ -1796,6 +1796,27 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
}
}

if (g_conf->mon_warn_osd_usage_percent) {
float max_osd_perc_avail = 0.0, min_osd_perc_avail = 1.0;
for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) {
// kb should never be 0, but avoid divide by zero in case of corruption
if (p->second.kb <= 0)
continue;
float perc_avail = ((float)(p->second.kb - p->second.kb_avail)) / ((float)p->second.kb);
if (perc_avail > max_osd_perc_avail)
max_osd_perc_avail = perc_avail;
if (perc_avail < min_osd_perc_avail)
min_osd_perc_avail = perc_avail;
}
if ((max_osd_perc_avail - min_osd_perc_avail) > g_conf->mon_warn_osd_usage_percent) {
ostringstream ss;
ss << "Difference in osd space utilization " << ((max_osd_perc_avail - min_osd_perc_avail) *100) << "% greater than " << (g_conf->mon_warn_osd_usage_percent * 100) << "%";
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
if (detail)
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
}
}

// recovery
list<string> sl;
pg_map.overall_recovery_summary(NULL, &sl);
Expand Down

0 comments on commit bc324e0

Please sign in to comment.