From 025b10a5329127734367a6899543f51cd8580d43 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 10 Jul 2019 18:15:44 +0000 Subject: [PATCH] osd: Add "dump_osd_network" osd admin request to get a sorted report Signed-off-by: David Zafman --- src/osd/OSD.cc | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/osd/OSD.h | 6 ++++ 2 files changed, 82 insertions(+) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a8d8a677b6144..6363493013235 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -24,6 +24,7 @@ #include #include #include +#include #ifdef HAVE_SYS_PARAM_H #include @@ -2548,6 +2549,77 @@ will start to track new ops received afterwards."; if (is_active()) { send_beacon(ceph::coarse_mono_clock::now()); } + } else if (admin_command == "dump_osd_network") { + int64_t value = 0; + if (!(cmd_getval(cct, cmdmap, "value", value))) { + value = static_cast(g_conf().get_val("mon_warn_on_slow_ping_time")); + } + if (value < 0) value = 0; + + struct osd_ping_time_t { + uint32_t pingtime; + int to; + bool back; + std::array times; + + bool operator<(const osd_ping_time_t& rhs) const { + if (pingtime < rhs.pingtime) + return true; + if (pingtime > rhs.pingtime) + return false; + if (to < rhs.to) + return true; + if (to > rhs.to) + return false; + return back; + } + }; + + set sorted; + // Get pingtimes under lock and not on the stack + map *pingtimes = new map; + service.get_hb_pingtime(pingtimes); + for (auto j : *pingtimes) { + osd_ping_time_t item; + item.pingtime = std::max(j.second.back_pingtime[0], j.second.back_pingtime[1]); + item.pingtime = std::max(item.pingtime, j.second.back_pingtime[2]); + if (item.pingtime >= value) { + item.to = j.first; + item.times[0] = j.second.back_pingtime[0]; + item.times[1] = j.second.back_pingtime[1]; + item.times[2] = j.second.back_pingtime[2]; + item.back = true; + sorted.emplace(item); + } + if (j.second.front_pingtime[0] == 0) + continue; + item.pingtime = std::max(j.second.front_pingtime[0], j.second.front_pingtime[1]); + item.pingtime = std::max(item.pingtime, j.second.front_pingtime[2]); + if (item.pingtime >= value) { + item.to = j.first; + item.times[0] = j.second.front_pingtime[0]; + item.times[1] = j.second.front_pingtime[1]; + item.times[2] = j.second.front_pingtime[2]; + item.back = false; + sorted.emplace(item); + } + } + delete pingtimes; + // + // Network ping times (1min 5min 15min) + f->open_array_section("network_ping_times"); + for (auto &sitem : boost::adaptors::reverse(sorted)) { + ceph_assert(sitem.pingtime >= value); + f->open_object_section("entry"); + f->dump_int("from osd", whoami); + f->dump_int("to osd", sitem.to); + f->dump_string("interface", (sitem.back ? "back" : "front")); + f->dump_int("1min", sitem.times[0]); + f->dump_int("5min", sitem.times[1]); + f->dump_int("15min", sitem.times[2]); + f->close_section(); // entry + } + f->close_section(); // network_ping_times } else { ceph_abort_msg("broken asok registration"); } @@ -3253,6 +3325,10 @@ void OSD::final_init() asok_hook, "send OSD beacon to mon immediately"); + r = admin_socket->register_command("dump_osd_network", "dump_osd_network name=value,type=CephInt,req=false", asok_hook, + "Dump osd heartbeat network ping times"); + ceph_assert(r == 0); + test_ops_hook = new TestOpsSocketHook(&(this->service), this->store); // Note: pools are CephString instead of CephPoolname because // these commands traditionally support both pool names and numbers diff --git a/src/osd/OSD.h b/src/osd/OSD.h index c6513ec792fc8..f9dbcfd59e61a 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -760,6 +760,12 @@ class OSDService { std::lock_guard l(stat_lock); return osd_stat.seq; } + void get_hb_pingtime(map *pp) + { + std::lock_guard l(stat_lock); + *pp = osd_stat.hb_pingtime; + return; + } // -- OSD Full Status -- private: