Skip to content

Commit

Permalink
osd,os,mon: extend 'ceph df' report to provide both USED and RAW_USED
Browse files Browse the repository at this point in the history
totals where USED is space allocated for object keeping while RAW_USED
also includes space allocated/reserved for internal purposes, e.g.
DB/WAL or journal.

Signed-off-by: Igor Fedotv <ifedotov@suse.com>
  • Loading branch information
ifed01 committed Dec 6, 2018
1 parent 6a01f36 commit 7ca25df
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 196 deletions.
6 changes: 3 additions & 3 deletions src/mon/MgrStatMonitor.cc
Expand Up @@ -65,11 +65,11 @@ void MgrStatMonitor::update_logger()
{
dout(20) << __func__ << dendl;

mon->cluster_logger->set(l_cluster_osd_bytes, digest.osd_sum.kb * 1024ull);
mon->cluster_logger->set(l_cluster_osd_bytes, digest.osd_sum.statfs.total);
mon->cluster_logger->set(l_cluster_osd_bytes_used,
digest.osd_sum.kb_used * 1024ull);
digest.osd_sum.statfs.get_used_raw());
mon->cluster_logger->set(l_cluster_osd_bytes_avail,
digest.osd_sum.kb_avail * 1024ull);
digest.osd_sum.statfs.available);

mon->cluster_logger->set(l_cluster_num_pool, digest.pg_pool_sum.size());
uint64_t num_pg = 0;
Expand Down
105 changes: 59 additions & 46 deletions src/mon/PGMap.cc
Expand Up @@ -208,18 +208,18 @@ void PGMapDigest::print_summary(Formatter *f, ostream *out) const
f->dump_unsigned("num_pools", pg_pool_sum.size());
f->dump_unsigned("num_objects", pg_sum.stats.sum.num_objects);
f->dump_unsigned("data_bytes", pg_sum.stats.sum.num_bytes);
f->dump_unsigned("bytes_used", osd_sum.kb_used * 1024ull);
f->dump_unsigned("bytes_avail", osd_sum.kb_avail * 1024ull);
f->dump_unsigned("bytes_total", osd_sum.kb * 1024ull);
f->dump_unsigned("bytes_used", osd_sum.statfs.get_used_raw());
f->dump_unsigned("bytes_avail", osd_sum.statfs.available);
f->dump_unsigned("bytes_total", osd_sum.statfs.total);
} else {
*out << " pools: " << pg_pool_sum.size() << " pools, "
<< num_pg << " pgs\n";
*out << " objects: " << si_u_t(pg_sum.stats.sum.num_objects) << " objects, "
<< byte_u_t(pg_sum.stats.sum.num_bytes) << "\n";
*out << " usage: "
<< byte_u_t(osd_sum.kb_used << 10) << " used, "
<< byte_u_t(osd_sum.kb_avail << 10) << " / "
<< byte_u_t(osd_sum.kb << 10) << " avail\n";
<< byte_u_t(osd_sum.statfs.get_used_raw()) << " used, "
<< byte_u_t(osd_sum.statfs.available) << " / "
<< byte_u_t(osd_sum.statfs.total) << " avail\n";
*out << " pgs: ";
}

Expand Down Expand Up @@ -340,15 +340,15 @@ void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const
*out << num_pg << " pgs: "
<< states << "; "
<< byte_u_t(pg_sum.stats.sum.num_bytes) << " data, "
<< byte_u_t(osd_sum.kb_used << 10) << " used, "
<< byte_u_t(osd_sum.kb_avail << 10) << " / "
<< byte_u_t(osd_sum.kb << 10) << " avail";
<< byte_u_t(osd_sum.statfs.get_used_raw()) << " used, "
<< byte_u_t(osd_sum.statfs.available) << " / "
<< byte_u_t(osd_sum.statfs.total) << " avail";
if (f) {
f->dump_unsigned("num_pgs", num_pg);
f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes);
f->dump_unsigned("raw_bytes_used", osd_sum.kb_used << 10);
f->dump_unsigned("raw_bytes_avail", osd_sum.kb_avail << 10);
f->dump_unsigned("raw_bytes", osd_sum.kb << 10);
f->dump_unsigned("raw_bytes_used", osd_sum.statfs.get_used_raw());
f->dump_unsigned("raw_bytes_avail", osd_sum.statfs.available);
f->dump_unsigned("raw_bytes", osd_sum.statfs.total);
}

// make non-negative; we can get negative values if osds send
Expand Down Expand Up @@ -740,9 +740,9 @@ ceph_statfs PGMapDigest::get_statfs(OSDMap &osdmap,
statfs.kb = statfs.kb_used + statfs.kb_avail;
} else {
// these are in KB.
statfs.kb = osd_sum.kb;
statfs.kb_used = osd_sum.kb_used;
statfs.kb_avail = osd_sum.kb_avail;
statfs.kb = osd_sum.statfs.kb();
statfs.kb_used = osd_sum.statfs.kb_used_raw();
statfs.kb_avail = osd_sum.statfs.kb_avail();
statfs.num_objects = pg_sum.stats.sum.num_objects;
}

Expand Down Expand Up @@ -848,16 +848,19 @@ void PGMapDigest::dump_pool_stats_full(

void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const
{
auto total = osd_sum.statfs.total;
auto used_raw = osd_sum.statfs.get_used_raw();
float used = 0.0;
if (osd_sum.kb > 0) {
used = ((float)osd_sum.kb_used / osd_sum.kb);
if (total > 0) {
used = ((float)used_raw / total);
}

if (f) {
f->open_object_section("stats");
f->dump_int("total_bytes", osd_sum.kb * 1024ull);
f->dump_int("total_used_bytes", osd_sum.kb_used * 1024ull);
f->dump_int("total_avail_bytes", osd_sum.kb_avail * 1024ull);
f->dump_int("total_bytes", total);
f->dump_int("total_avail_bytes", osd_sum.statfs.available);
f->dump_int("total_used_bytes", osd_sum.statfs.get_used());
f->dump_int("total_used_raw_bytes", used_raw);
f->dump_float("total_percent_used", used * 100);
if (verbose) {
f->dump_int("total_objects", pg_sum.stats.sum.num_objects);
Expand All @@ -868,14 +871,16 @@ void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) co
TextTable tbl;
tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("%RAW USED", TextTable::LEFT, TextTable::RIGHT);
if (verbose) {
tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
}
tbl << stringify(byte_u_t(osd_sum.kb*1024))
<< stringify(byte_u_t(osd_sum.kb_avail*1024))
<< stringify(byte_u_t(osd_sum.kb_used*1024));
tbl << stringify(byte_u_t(total))
<< stringify(byte_u_t(osd_sum.statfs.available))
<< stringify(byte_u_t(osd_sum.statfs.get_used()))
<< stringify(byte_u_t(used_raw));
tbl << percentify(used*100);
if (verbose) {
tbl << stringify(si_u_t(pg_sum.stats.sum.num_objects));
Expand Down Expand Up @@ -913,8 +918,7 @@ void PGMapDigest::dump_object_stat_sum(
}
auto avail_res = raw_used_rate ? avail / raw_used_rate : 0;
// an approximation for actually stored user data
auto stored_normalized =
raw_used_rate ? statfs.data_stored / raw_used_rate : 0;
auto stored_normalized = pool_stat.get_user_bytes(raw_used_rate);
if (f) {
f->dump_int("kb_used", shift_round_up(used_bytes, 10));
f->dump_int("bytes_used", used_bytes);
Expand All @@ -933,7 +937,7 @@ void PGMapDigest::dump_object_stat_sum(
f->dump_int("compress_bytes_used", statfs.data_compressed_allocated);
f->dump_int("compress_under_bytes", statfs.data_compressed_original);
// Stored by user amplified by replication
f->dump_int("stored_raw", statfs.data_stored);
f->dump_int("stored_raw", pool_stat.get_user_bytes(1.0));
}
} else {
tbl << stringify(byte_u_t(statfs.allocated));
Expand Down Expand Up @@ -984,17 +988,17 @@ int64_t PGMap::get_rule_avail(const OSDMap& osdmap, int ruleno) const
for (auto p = wm.begin(); p != wm.end(); ++p) {
auto osd_info = osd_stat.find(p->first);
if (osd_info != osd_stat.end()) {
if (osd_info->second.kb == 0 || p->second == 0) {
if (osd_info->second.statfs.total == 0 || p->second == 0) {
// osd must be out, hence its stats have been zeroed
// (unless we somehow managed to have a disk with size 0...)
//
// (p->second == 0), if osd weight is 0, no need to
// calculate proj below.
continue;
}
double unusable = (double)osd_info->second.kb *
double unusable = (double)osd_info->second.statfs.kb() *
(1.0 - fratio);
double avail = std::max(0.0, (double)osd_info->second.kb_avail - unusable);
double avail = std::max(0.0, (double)osd_info->second.statfs.kb_avail() - unusable);
avail *= 1024.0;
int64_t proj = (int64_t)(avail / (double)p->second);
if (min < 0 || proj < min) {
Expand Down Expand Up @@ -1745,6 +1749,7 @@ void PGMap::dump_osd_stats(ostream& ss) const
tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT);
tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("HB_PEERS", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("PG_SUM", TextTable::LEFT, TextTable::RIGHT);
Expand All @@ -1754,19 +1759,21 @@ void PGMap::dump_osd_stats(ostream& ss) const
p != osd_stat.end();
++p) {
tab << p->first
<< byte_u_t(p->second.kb_used << 10)
<< byte_u_t(p->second.kb_avail << 10)
<< byte_u_t(p->second.kb << 10)
<< byte_u_t(p->second.statfs.get_used())
<< byte_u_t(p->second.statfs.available)
<< byte_u_t(p->second.statfs.get_used_raw())
<< byte_u_t(p->second.statfs.total)
<< p->second.hb_peers
<< get_num_pg_by_osd(p->first)
<< get_num_primary_pg_by_osd(p->first)
<< TextTable::endrow;
}

tab << "sum"
<< byte_u_t(osd_sum.kb_used << 10)
<< byte_u_t(osd_sum.kb_avail << 10)
<< byte_u_t(osd_sum.kb << 10)
<< byte_u_t(osd_sum.statfs.get_used())
<< byte_u_t(osd_sum.statfs.available)
<< byte_u_t(osd_sum.statfs.get_used_raw())
<< byte_u_t(osd_sum.statfs.total)
<< TextTable::endrow;

ss << tab;
Expand All @@ -1779,12 +1786,14 @@ void PGMap::dump_osd_sum_stats(ostream& ss) const
tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT);
tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT);

tab << "sum"
<< byte_u_t(osd_sum.kb_used << 10)
<< byte_u_t(osd_sum.kb_avail << 10)
<< byte_u_t(osd_sum.kb << 10)
<< byte_u_t(osd_sum.statfs.get_used())
<< byte_u_t(osd_sum.statfs.available)
<< byte_u_t(osd_sum.statfs.get_used_raw())
<< byte_u_t(osd_sum.statfs.total)
<< TextTable::endrow;

ss << tab;
Expand Down Expand Up @@ -3367,7 +3376,7 @@ void PGMapUpdater::check_osd_map(
pending_inc->rm_stat(p.first);
} else if (osdmap.is_out(p.first)) {
// zero osd_stat
if (p.second.kb != 0) {
if (p.second.statfs.total != 0) {
pending_inc->stat_osd_out(p.first);
}
} else if (!osdmap.is_up(p.first)) {
Expand Down Expand Up @@ -3589,20 +3598,22 @@ int reweight::by_utilization(
} else {
// by osd utilization
int num_osd = std::max<size_t>(1, pgm.osd_stat.size());
if ((uint64_t)pgm.osd_sum.kb * 1024 / num_osd
if ((uint64_t)pgm.osd_sum.statfs.total / num_osd
< g_conf()->mon_reweight_min_bytes_per_osd) {
*ss << "Refusing to reweight: we only have " << pgm.osd_sum.kb
*ss << "Refusing to reweight: we only have " << pgm.osd_sum.statfs.kb()
<< " kb across all osds!\n";
return -EDOM;
}
if ((uint64_t)pgm.osd_sum.kb_used * 1024 / num_osd
if ((uint64_t)pgm.osd_sum.statfs.get_used_raw() / num_osd
< g_conf()->mon_reweight_min_bytes_per_osd) {
*ss << "Refusing to reweight: we only have " << pgm.osd_sum.kb_used
*ss << "Refusing to reweight: we only have "
<< pgm.osd_sum.statfs.kb_used_raw()
<< " kb used across all osds!\n";
return -EDOM;
}

average_util = (double)pgm.osd_sum.kb_used / (double)pgm.osd_sum.kb;
average_util = (double)pgm.osd_sum.statfs.get_used_raw() /
(double)pgm.osd_sum.statfs.total;
}

// adjust down only if we are above the threshold
Expand Down Expand Up @@ -3649,9 +3660,11 @@ int reweight::by_utilization(
continue;
}

osd_util.second = pgs_by_osd[p.first] / osdmap.crush->get_item_weightf(p.first);
osd_util.second =
pgs_by_osd[p.first] / osdmap.crush->get_item_weightf(p.first);
} else {
osd_util.second = (double)p.second.kb_used / (double)p.second.kb;
osd_util.second =
(double)p.second.statfs.get_used_raw() / (double)p.second.statfs.total;
}
util_by_osd.push_back(osd_util);
}
Expand Down
32 changes: 20 additions & 12 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -6666,6 +6666,7 @@ int BlueStore::_fsck(bool deep, bool repair)
// structs
statfs(&actual_statfs);
expected_statfs.total = actual_statfs.total;
expected_statfs.internally_reserved = actual_statfs.internally_reserved;
expected_statfs.available = actual_statfs.available;
expected_statfs.internal_metadata = actual_statfs.internal_metadata;
expected_statfs.omap_allocated = actual_statfs.omap_allocated;
Expand Down Expand Up @@ -7591,7 +7592,7 @@ int BlueStore::get_devices(set<string> *ls)
return 0;
}

int BlueStore::statfs(struct store_statfs_t *buf)
void BlueStore::_get_statfs_overall(struct store_statfs_t *buf)
{
buf->reset();

Expand All @@ -7600,11 +7601,14 @@ int BlueStore::statfs(struct store_statfs_t *buf)
uint64_t bfree = alloc->get_free();

if (bluefs) {
int64_t bluefs_total = bluefs->get_total(bluefs_shared_bdev);
int64_t bluefs_free = bluefs->get_free(bluefs_shared_bdev);
// part of our shared device is "free" according to BlueFS, but we
// can't touch bluestore_bluefs_min of it.
int64_t shared_available = std::min(
bluefs->get_free(bluefs_shared_bdev),
bluefs->get_total(bluefs_shared_bdev) - cct->_conf->bluestore_bluefs_min);
bluefs_free,
int64_t(bluefs_total - cct->_conf->bluestore_bluefs_min));
buf->internally_reserved = bluefs_total - shared_available;
if (shared_available > 0) {
bfree += shared_available;
}
Expand All @@ -7618,15 +7622,6 @@ int BlueStore::statfs(struct store_statfs_t *buf)
- buf->omap_allocated;
}

{
std::lock_guard l(vstatfs_lock);
buf->allocated = vstatfs.allocated();
buf->data_stored = vstatfs.stored();
buf->data_compressed = vstatfs.compressed();
buf->data_compressed_original = vstatfs.compressed_original();
buf->data_compressed_allocated = vstatfs.compressed_allocated();
}

uint64_t thin_total, thin_avail;
if (bdev->get_thin_utilization(&thin_total, &thin_avail)) {
buf->total += thin_total;
Expand All @@ -7640,6 +7635,19 @@ int BlueStore::statfs(struct store_statfs_t *buf)
buf->total += bdev->get_size();
}
buf->available = bfree;
}

int BlueStore::statfs(struct store_statfs_t *buf)
{
_get_statfs_overall(buf);
{
std::lock_guard l(vstatfs_lock);
buf->allocated = vstatfs.allocated();
buf->data_stored = vstatfs.stored();
buf->data_compressed = vstatfs.compressed();
buf->data_compressed_original = vstatfs.compressed_original();
buf->data_compressed_allocated = vstatfs.compressed_allocated();
}

dout(20) << __func__ << " " << *buf << dendl;
return 0;
Expand Down
1 change: 1 addition & 0 deletions src/os/bluestore/BlueStore.h
Expand Up @@ -2176,6 +2176,7 @@ class BlueStore : public ObjectStore,
int _open_super_meta();

void _open_statfs();
void _get_statfs_overall(struct store_statfs_t *buf);

void _dump_alloc_on_rebalance_failure();
int _reconcile_bluefs_freespace();
Expand Down
1 change: 1 addition & 0 deletions src/os/filestore/FileStore.cc
Expand Up @@ -769,6 +769,7 @@ int FileStore::statfs(struct store_statfs_t *buf0)
// Adjust for writes pending in the journal
if (journal) {
uint64_t estimate = journal->get_journal_size_estimate();
buf0->internally_reserved = estimate;
if (buf0->available > estimate)
buf0->available -= estimate;
else
Expand Down
14 changes: 5 additions & 9 deletions src/osd/OSD.cc
Expand Up @@ -858,20 +858,15 @@ void OSDService::set_injectfull(s_names type, int64_t count)
void OSDService::set_statfs(const struct store_statfs_t &stbuf)
{
uint64_t bytes = stbuf.total;
uint64_t used = bytes - stbuf.available;
uint64_t avail = stbuf.available;
uint64_t used = stbuf.get_used_raw();

osd->logger->set(l_osd_stat_bytes, bytes);
osd->logger->set(l_osd_stat_bytes_used, used);
osd->logger->set(l_osd_stat_bytes_avail, avail);

std::lock_guard l(stat_lock);
osd_stat.kb = bytes >> 10;
osd_stat.kb_used = used >> 10;
osd_stat.kb_avail = avail >> 10;
osd_stat.kb_used_data = stbuf.allocated >> 10;
osd_stat.kb_used_omap = stbuf.omap_allocated >> 10;
osd_stat.kb_used_meta = stbuf.internal_metadata >> 10;
osd_stat.statfs = stbuf;
}

osd_stat_t OSDService::set_osd_stat(vector<int>& hb_peers,
Expand Down Expand Up @@ -4949,9 +4944,10 @@ void OSD::heartbeat()

auto new_stat = service.set_osd_stat(hb_peers, get_num_pgs());
dout(5) << __func__ << " " << new_stat << dendl;
ceph_assert(new_stat.kb);
ceph_assert(new_stat.statfs.total);

float ratio = ((float)new_stat.kb_used) / ((float)new_stat.kb);
float ratio =
((float)new_stat.statfs.get_used()) / ((float)new_stat.statfs.total);
service.check_full_status(ratio);

utime_t now = ceph_clock_now();
Expand Down

0 comments on commit 7ca25df

Please sign in to comment.