Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

shell: improve subcommand app_stat and nodes #277

Merged
merged 5 commits into from
Feb 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/server/info_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ void info_collector::on_app_stat()
all.storage_count += row.storage_count;
all.rdb_block_cache_hit_count += row.rdb_block_cache_hit_count;
all.rdb_block_cache_total_count += row.rdb_block_cache_total_count;
all.rdb_block_cache_mem_usage += row.rdb_block_cache_mem_usage;
all.rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage;
all.rdb_memtable_mem_usage += row.rdb_memtable_mem_usage;
read_qps[i] = row.get_qps + row.multi_get_qps + row.scan_qps;
Expand Down Expand Up @@ -139,7 +138,6 @@ void info_collector::on_app_stat()
std::abs(row.rdb_block_cache_total_count) < 1e-6
? 0
: row.rdb_block_cache_hit_count / row.rdb_block_cache_total_count * 1000000);
counters->rdb_block_cache_mem_usage->set(row.rdb_block_cache_mem_usage);
counters->rdb_index_and_filter_blocks_mem_usage->set(
row.rdb_index_and_filter_blocks_mem_usage);
counters->rdb_memtable_mem_usage->set(row.rdb_memtable_mem_usage);
Expand Down Expand Up @@ -192,7 +190,6 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str
INIT_COUNTER(storage_mb);
INIT_COUNTER(storage_count);
INIT_COUNTER(rdb_block_cache_hit_rate);
INIT_COUNTER(rdb_block_cache_mem_usage);
INIT_COUNTER(rdb_index_and_filter_blocks_mem_usage);
INIT_COUNTER(rdb_memtable_mem_usage);
INIT_COUNTER(read_qps);
Expand Down
3 changes: 0 additions & 3 deletions src/shell/command_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,6 @@ struct row_data
double storage_count = 0;
double rdb_block_cache_hit_count = 0;
double rdb_block_cache_total_count = 0;
double rdb_block_cache_mem_usage = 0;
double rdb_index_and_filter_blocks_mem_usage = 0;
double rdb_memtable_mem_usage = 0;
};
Expand Down Expand Up @@ -531,8 +530,6 @@ update_app_pegasus_perf_counter(row_data &row, const std::string &counter_name,
row.rdb_block_cache_hit_count += value;
else if (counter_name == "rdb.block_cache.total_count")
row.rdb_block_cache_total_count += value;
else if (counter_name == "rdb.block_cache.memory_usage")
row.rdb_block_cache_mem_usage += value;
else if (counter_name == "rdb.index_and_filter_blocks.memory_usage")
row.rdb_index_and_filter_blocks_mem_usage += value;
else if (counter_name == "rdb.memtable.memory_usage")
Expand Down
229 changes: 219 additions & 10 deletions src/shell/commands.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,27 +164,63 @@ inline bool ls_apps(command_executor *e, shell_context *sc, arguments args)
return true;
}

struct list_nodes_helper
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
{
std::string node_name;
std::string node_status;
int primary_count;
int secondary_count;
int64_t memused_res_mb;
int64_t block_cache_bytes;
int64_t mem_tbl_bytes;
int64_t mem_idx_bytes;
int64_t disk_available_total_ratio;
int64_t disk_available_min_ratio;
list_nodes_helper(const std::string &n, const std::string &s)
: node_name(n),
node_status(s),
primary_count(0),
secondary_count(0),
memused_res_mb(0),
block_cache_bytes(0),
mem_tbl_bytes(0),
mem_idx_bytes(0),
disk_available_total_ratio(0),
disk_available_min_ratio(0)
{
}
};
inline bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
{
static struct option long_options[] = {{"detailed", no_argument, 0, 'd'},
{"resolve_ip", no_argument, 0, 'r'},
{"resource_usage", no_argument, 0, 'u'},
{"status", required_argument, 0, 's'},
{"output", required_argument, 0, 'o'},
{0, 0, 0, 0}};

std::string status;
std::string output_file;
bool detailed = false;
bool resolve_ip = false;
bool resource_usage = false;
optind = 0;
while (true) {
int option_index = 0;
int c;
c = getopt_long(args.argc, args.argv, "ds:o:", long_options, &option_index);
c = getopt_long(args.argc, args.argv, "drus:o:", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'd':
detailed = true;
break;
case 'r':
resolve_ip = true;
break;
case 'u':
resource_usage = true;
break;
case 's':
status = optarg;
break;
Expand Down Expand Up @@ -217,9 +253,184 @@ inline bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
status.c_str());
}

::dsn::error_code err = sc->ddl_client->list_nodes(s, detailed, output_file);
if (err != ::dsn::ERR_OK)
std::cout << "list nodes failed, error=" << err.to_string() << std::endl;
std::map<dsn::rpc_address, dsn::replication::node_status::type> nodes;
auto r = sc->ddl_client->list_nodes(s, nodes);
if (r != dsn::ERR_OK) {
std::cout << "list nodes failed, error=" << r.to_string() << std::endl;
return true;
}

std::map<dsn::rpc_address, list_nodes_helper> tmp_map;
int alive_node_count = 0;
for (auto &kv : nodes) {
if (kv.second == dsn::replication::node_status::NS_ALIVE)
alive_node_count++;
std::string status_str = dsn::enum_to_string(kv.second);
status_str = status_str.substr(status_str.find("NS_") + 3);
std::string node_name = kv.first.to_std_string();
if (resolve_ip) {
// TODO: put hostname_from_ip_port into common utils
node_name = sc->ddl_client->hostname_from_ip_port(node_name.c_str());
}
tmp_map.emplace(kv.first, list_nodes_helper(node_name, status_str));
}

if (detailed) {
std::vector<::dsn::app_info> apps;
r = sc->ddl_client->list_apps(dsn::app_status::AS_AVAILABLE, apps);
if (r != dsn::ERR_OK) {
std::cout << "list apps failed, error=" << r.to_string() << std::endl;
return true;
}

for (auto &app : apps) {
int32_t app_id;
int32_t partition_count;
std::vector<dsn::partition_configuration> partitions;
r = sc->ddl_client->list_app(app.app_name, app_id, partition_count, partitions);
if (r != dsn::ERR_OK) {
std::cout << "list app " << app.app_name << " failed, error=" << r.to_string()
<< std::endl;
return true;
}

for (const dsn::partition_configuration &p : partitions) {
if (!p.primary.is_invalid()) {
auto find = tmp_map.find(p.primary);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find改下名字? 一是跟函数find重名,二是不知道啥内容

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

还有几个地方用的find

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个问题不大,我一直都是这么写的

if (find != tmp_map.end()) {
find->second.primary_count++;
}
}
for (const dsn::rpc_address &addr : p.secondaries) {
auto find = tmp_map.find(addr);
if (find != tmp_map.end()) {
find->second.secondary_count++;
}
}
}
}
}

if (resource_usage) {
std::vector<node_desc> nodes;
if (!fill_nodes(sc, "replica-server", nodes)) {
derror("get replica server node list failed");
return true;
}

::dsn::command command;
command.cmd = "perf-counters";
command.arguments.push_back(".*memused.res(MB)");
command.arguments.push_back(".*rdb.block_cache.memory_usage");
command.arguments.push_back(".*disk.available.total.ratio");
command.arguments.push_back(".*disk.available.min.ratio");
command.arguments.push_back(".*@.*");
std::vector<std::pair<bool, std::string>> results;
call_remote_command(sc, nodes, command, results);

for (int i = 0; i < nodes.size(); ++i) {
dsn::rpc_address node_addr = nodes[i].address;
auto tmp_it = tmp_map.find(node_addr);
if (tmp_it == tmp_map.end())
continue;
if (!results[i].first) {
derror("query perf counter info from node %s failed", node_addr.to_string());
return true;
}
dsn::perf_counter_info info;
dsn::blob bb(results[i].second.data(), 0, results[i].second.size());
if (!dsn::json::json_forwarder<dsn::perf_counter_info>::decode(bb, info)) {
derror("decode perf counter info from node %s failed, result = %s",
node_addr.to_string(),
results[i].second.c_str());
return true;
}
if (info.result != "OK") {
derror("query perf counter info from node %s returns error, error = %s",
node_addr.to_string(),
info.result.c_str());
return true;
}
list_nodes_helper &h = tmp_it->second;
for (dsn::perf_counter_metric &m : info.counters) {
if (m.name == "replica*server*memused.res(MB)")
h.memused_res_mb = m.value;
else if (m.name == "replica*app.pegasus*rdb.block_cache.memory_usage")
h.block_cache_bytes = m.value;
else if (m.name == "replica*eon.replica_stub*disk.available.total.ratio")
h.disk_available_total_ratio = m.value;
else if (m.name == "replica*eon.replica_stub*disk.available.min.ratio")
h.disk_available_min_ratio = m.value;
else {
int32_t app_id_x, partition_index_x;
std::string counter_name;
bool parse_ret = parse_app_pegasus_perf_counter_name(
m.name, app_id_x, partition_index_x, counter_name);
dassert(parse_ret, "name = %s", m.name.c_str());
if (counter_name == "rdb.memtable.memory_usage")
h.mem_tbl_bytes += m.value;
else if (counter_name == "rdb.index_and_filter_blocks.memory_usage")
h.mem_idx_bytes += m.value;
}
}
}
}

// print configuration_list_nodes_response
std::streambuf *buf;
std::ofstream of;

if (!output_file.empty()) {
of.open(output_file);
buf = of.rdbuf();
} else {
buf = std::cout.rdbuf();
}
std::ostream out(buf);

dsn::utils::table_printer tp;
tp.add_title("address");
tp.add_column("status");
if (detailed) {
tp.add_column("replica_count", tp_alignment::kRight);
tp.add_column("primary_count", tp_alignment::kRight);
tp.add_column("secondary_count", tp_alignment::kRight);
}
if (resource_usage) {
tp.add_column("memused_res_mb", tp_alignment::kRight);
tp.add_column("block_cache_mb", tp_alignment::kRight);
tp.add_column("mem_tbl_mb", tp_alignment::kRight);
tp.add_column("mem_idx_mb", tp_alignment::kRight);
tp.add_column("disk_avl_total_ratio", tp_alignment::kRight);
tp.add_column("disk_avl_min_ratio", tp_alignment::kRight);
}
for (auto &kv : tmp_map) {
tp.add_row(kv.second.node_name);
tp.append_data(kv.second.node_status);
if (detailed) {
tp.append_data(kv.second.primary_count + kv.second.secondary_count);
tp.append_data(kv.second.primary_count);
tp.append_data(kv.second.secondary_count);
}
if (resource_usage) {
tp.append_data(kv.second.memused_res_mb);
tp.append_data(kv.second.block_cache_bytes / (1 << 20U));
tp.append_data(kv.second.mem_tbl_bytes / (1 << 20U));
tp.append_data(kv.second.mem_idx_bytes / (1 << 20U));
tp.append_data(kv.second.disk_available_total_ratio);
tp.append_data(kv.second.disk_available_min_ratio);
}
}
tp.output(out);
out << std::endl;

dsn::utils::table_printer tp_count;
tp_count.add_row_name_and_data("total_node_count", nodes.size());
tp_count.add_row_name_and_data("alive_node_count", alive_node_count);
tp_count.add_row_name_and_data("unalive_node_count", nodes.size() - alive_node_count);
tp_count.output(out, ": ");
out << std::endl;

return true;
}

Expand Down Expand Up @@ -3772,7 +3983,6 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
sum.storage_count += row.storage_count;
sum.rdb_block_cache_hit_count += row.rdb_block_cache_hit_count;
sum.rdb_block_cache_total_count += row.rdb_block_cache_total_count;
sum.rdb_block_cache_mem_usage += row.rdb_block_cache_mem_usage;
sum.rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage;
sum.rdb_memtable_mem_usage += row.rdb_memtable_mem_usage;
}
Expand Down Expand Up @@ -3808,8 +4018,9 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
tp.add_column("rejected", tp_alignment::kRight);
tp.add_column("file_mb", tp_alignment::kRight);
tp.add_column("file_num", tp_alignment::kRight);
tp.add_column("mem_tbl_mb", tp_alignment::kRight);
tp.add_column("mem_idx_mb", tp_alignment::kRight);
tp.add_column("hit_rate", tp_alignment::kRight);
tp.add_column("rdb_mem_mb", tp_alignment::kRight);
}

for (row_data &row : rows) {
Expand All @@ -3832,15 +4043,13 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
tp.append_data(row.recent_write_throttling_reject_count);
tp.append_data(row.storage_mb);
tp.append_data((uint64_t)row.storage_count);
tp.append_data(row.rdb_memtable_mem_usage / (1 << 20U));
tp.append_data(row.rdb_index_and_filter_blocks_mem_usage / (1 << 20U));
double block_cache_hit_rate =
std::abs(row.rdb_block_cache_total_count) < 1e-6
? 0.0
: row.rdb_block_cache_hit_count / row.rdb_block_cache_total_count;
tp.append_data(block_cache_hit_rate);
tp.append_data((row.rdb_block_cache_mem_usage +
row.rdb_index_and_filter_blocks_mem_usage +
row.rdb_memtable_mem_usage) /
(1 << 20U));
}
}
tp.output(out);
Expand Down
3 changes: 2 additions & 1 deletion src/shell/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ static command_executor commands[] = {
{
"nodes",
"get the node status for this cluster",
"[-d|--detailed] [-o|--output file_name] [-s|--status all|alive|unalive]",
"[-d|--detailed] [-r|--resolve_ip] [-u|--resource_usage] "
"[-o|--output file_name] [-s|--status all|alive|unalive]",
ls_nodes,
},
{
Expand Down