Skip to content

Commit

Permalink
Merge remote-tracking branch 'couchbase/trinity'
Browse files Browse the repository at this point in the history
* MB-61751: Add counters for [graceful] failover status

Change-Id: I6f37d6f95abc1c7f8af403ca71a2dd009180ada5
  • Loading branch information
stevewatanabe committed May 6, 2024
2 parents 2264902 + 4eb065d commit 4c0d46c
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 10 deletions.
2 changes: 2 additions & 0 deletions apps/ns_server/src/ns_orchestrator.erl
Expand Up @@ -1703,6 +1703,8 @@ rebalance_type2text(service_upgrade) ->
<<"Service upgrade">>.

update_rebalance_counters(Reason, #rebalancing_state{type = Type}) ->
%% If any new counter is added a corresponding convert_to_reported_event
%% must be added to ns_server_stats.erl.
Counter =
case Reason of
normal ->
Expand Down
36 changes: 27 additions & 9 deletions apps/ns_server/src/ns_server_stats.erl
Expand Up @@ -403,23 +403,41 @@ report_ns_server_hc_stats(ReportFun) ->
convert_to_reported_event(<<"start">>) -> <<"initiated">>;
convert_to_reported_event(<<"success">>) -> <<"completed">>;
convert_to_reported_event(<<"fail">>) -> <<"failed">>;
convert_to_reported_event(<<"interrupted">>) -> <<"interrupted">>;
convert_to_reported_event(<<"stop">>) -> <<"stopped">>;
convert_to_reported_event(Other) -> Other.
%% We only want the orchestrator counters which use the above suffixes to
%% 'failover_'. The counters generated by the failover module also start
%% with 'failover_' but also include graceful failovers. Fortunately the
%% trailing portions of the 'failover_' stats don't overlap between the
%% two modules.
convert_to_reported_event(_) -> skip.

%% Report cluster-wide stats (stored in chronicle).
report_cluster_stats(ReportFun) ->
Counters = ns_cluster:counters(),
lists:foreach(
fun ({Key, Val}) ->
KeyBin = key_to_binary(Key),
case KeyBin of
<<"rebalance_", Event/binary>> ->
Label = [{<<"event">>, convert_to_reported_event(Event)}],
ReportFun({<<"cm">>, <<"rebalance_total">>, Label, Val});
_ ->
ok
end
{Event, StatName} =
case KeyBin of
<<"rebalance_", Event0/binary>> ->
{convert_to_reported_event(Event0),
<<"rebalance_total">>};
<<"failover_", Event0/binary>> ->
{convert_to_reported_event(Event0),
<<"failover_total">>};
<<"graceful_failover_", Event0/binary>> ->
{convert_to_reported_event(Event0),
<<"graceful_failover_total">>};
_ ->
{skip, undefined}
end,
case Event of
skip ->
ok;
_ ->
Label = [{<<"event">>, Event}],
ReportFun({<<"cm">>, StatName, Label, Val})
end
end, Counters).

%% Delete stats for the specified bucket.
Expand Down
24 changes: 23 additions & 1 deletion etc/metrics_metadata.json
Expand Up @@ -117,12 +117,34 @@
}
]
},
"cm_failover_total": {
"type": "counter",
"help": "Number of non-graceful failover results",
"added": "7.6.2",
"labels": [
{
"name": "event",
"help": "failover result (initiated/completed/failed/stopped)"
}
]
},
"cm_gc_duration_seconds": {
"type": "histogram",
"help": "Time to perform erlang garbage collection",
"added": "7.6.0",
"stability": "committed"
},
"cm_graceful_failover_total": {
"type": "counter",
"help": "Number of graceful failover results",
"added": "7.6.2",
"labels": [
{
"name": "event",
"help": "graceful failover result (initiated/completed/failed/stopped)"
}
]
},
"cm_http_requests_seconds": {
"type": "histogram",
"help": "Number of bucket HTTP requests",
Expand Down Expand Up @@ -462,7 +484,7 @@
"labels": [
{
"name": "event",
"help": "rebalance result (initiated/completed/failed/interrupted/stopped)"
"help": "rebalance result (initiated/completed/failed/stopped)"
}
]
},
Expand Down

0 comments on commit 4c0d46c

Please sign in to comment.