/
derived_metrics.erl
184 lines (172 loc) · 8.03 KB
/
derived_metrics.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
%% @author Couchbase <info@couchbase.com>
%% @copyright 2021-Present Couchbase, Inc.
%%
%% Use of this software is governed by the Business Source License included in
%% the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that
%% file, in accordance with the Business Source License, use of this software
%% will be governed by the Apache License, Version 2.0, included in the file
%% licenses/APL2.txt.
%%
-module(derived_metrics).
-export([is_metric/1, get_metric/2]).
-include("cut.hrl").
is_metric(Name) ->
get_metric(Name) =/= [].
get_metric(Name, Key) ->
proplists:get_value(Key, get_metric(Name)).
get_metric(<<"n1ql_avg_req_time">>) ->
ratio(_(<<"n1ql_request_time">>), _(<<"n1ql_requests">>));
get_metric(<<"n1ql_avg_svc_time">>) ->
ratio(_(<<"n1ql_service_time">>), _(<<"n1ql_requests">>));
get_metric(<<"n1ql_avg_response_size">>) ->
ratio(_(<<"n1ql_result_size">>), _(<<"n1ql_requests">>));
get_metric(<<"n1ql_avg_result_count">>) ->
ratio(_(<<"n1ql_result_count">>), _(<<"n1ql_requests">>));
get_metric(<<"index_ram_percent">>) ->
percent(_(<<"index_memory_used_total">>), _(<<"index_memory_quota">>), 100);
get_metric(<<"index_remaining_ram">>) ->
sum_across_labels(
fun (M) ->
Diff = promQL:op('-', [M(<<"index_memory_quota">>),
M(<<"index_memory_used_total">>)]),
promQL:clamp_min(Diff, 0)
end, []);
get_metric(<<"index_num_docs_pending_and_queued">>) ->
sum([<<"index_num_docs_pending">>, <<"index_num_docs_queued">>]);
get_metric(<<"index_cache_miss_ratio">>) ->
percent(_(<<"index_cache_misses">>),
fun (M) ->
promQL:op('+', [M(<<"index_cache_misses">>),
M(<<"index_cache_hits">>)])
end, 0);
get_metric(<<"index_fragmentation">>) ->
ratio(
fun (M) ->
promQL:sum_without([<<"index">>, <<"collection">>, <<"scope">>],
promQL:op('*', [M(<<"index_disk_size">>),
M(<<"index_frag_percent">>)]))
end,
fun (M) ->
promQL:sum_without([<<"index">>, <<"collection">>, <<"scope">>],
M(<<"index_disk_size">>))
end, 100);
get_metric(<<"index_resident_percent">>) ->
ratio(fun (M) ->
promQL:op('*', [M(<<"index_resident_percent">>),
M(<<"index_data_size">>)])
end,
_(<<"index_data_size">>), 100);
get_metric(<<"couch_total_disk_size">>) ->
sum([<<"couch_docs_actual_disk_size">>,
<<"couch_views_actual_disk_size">>]);
get_metric(<<"couch_docs_fragmentation">>) ->
opposite_percent(_(<<"kv_ep_db_data_size_bytes">>),
_(<<"kv_ep_db_file_size_bytes">>));
get_metric(<<"couch_views_fragmentation">>) ->
opposite_percent(_(<<"couch_views_data_size">>),
_(<<"couch_views_disk_size">>));
get_metric(<<"kv_hit_ratio">>) ->
percent(?cut(promQL:sum_without([<<"result">>, <<"op">>],
_({[{eq, <<"name">>, <<"kv_ops">>},
{eq, <<"op">>, <<"get">>},
{eq, <<"result">>, <<"hit">>}]}))),
?cut(promQL:sum_without([<<"result">>, <<"op">>],
_({[{eq, <<"name">>, <<"kv_ops">>},
{eq, <<"op">>, <<"get">>}]}))), 100);
get_metric(<<"kv_ep_cache_miss_ratio">>) ->
percent(_(<<"kv_ep_bg_fetched">>),
?cut(promQL:sum_without([<<"op">>, <<"result">>],
_({[{eq, <<"name">>, <<"kv_ops">>},
{eq, <<"op">>, <<"get">>}]}))), 0);
get_metric(<<"kv_ep_resident_items_ratio">>) ->
opposite_percent(_(<<"kv_ep_num_non_resident">>),
_(<<"kv_curr_items_tot">>));
get_metric(<<"kv_vb_avg_queue_age_seconds">>) ->
ratio(_(<<"kv_vb_queue_age_seconds">>), _(<<"kv_vb_queue_size">>));
get_metric(<<"kv_vb_avg_total_queue_age_seconds">>) ->
ratio(?cut(promQL:sum_without([<<"state">>],
_(<<"kv_vb_queue_age_seconds">>))),
?cut(promQL:sum_without([<<"state">>],
_(<<"kv_vb_queue_size">>))));
get_metric(<<"kv_avg_disk_time_seconds">>) ->
ratio(_(<<"kv_disk_seconds_sum">>), _(<<"kv_disk_seconds_count">>));
get_metric(<<"kv_avg_bg_wait_time_seconds">>) ->
ratio(_(<<"kv_bg_wait_seconds_sum">>), _(<<"kv_bg_wait_seconds_count">>));
get_metric(<<"kv_avg_timestamp_drift_seconds">>) ->
ratio(_(<<"kv_ep_hlc_drift_seconds">>), _(<<"kv_ep_hlc_drift_count">>));
get_metric(<<"kv_disk_write_queue">>) ->
sum([<<"kv_ep_flusher_todo">>, <<"kv_ep_queue_size">>]);
get_metric(<<"kv_ep_ops_create">>) ->
sum_across_labels(_(<<"kv_vb_ops_create">>), [<<"state">>]);
get_metric(<<"kv_ep_ops_update">>) ->
sum_across_labels(_(<<"kv_vb_ops_update">>), [<<"state">>]);
get_metric(<<"kv_xdc_ops">>) ->
sum_across_labels(
_(promQL:re(<<"op">>, <<"del_meta|get_meta|set_meta">>,
promQL:metric(<<"kv_ops">>))),
[<<"op">>, <<"result">>]);
get_metric(<<"kv_vb_resident_items_ratio">>) ->
opposite_percent(_(<<"kv_vb_num_non_resident">>),
_(<<"kv_vb_curr_items">>));
get_metric(<<"xdcr_percent_completeness">>) ->
percent(_(<<"xdcr_docs_processed_total">>),
fun (M) ->
promQL:sum_without([<<"name">>],
{union, [M(<<"xdcr_docs_processed_total">>),
M(<<"xdcr_changes_left_total">>)]})
end, 100);
get_metric(<<"eventing_processed_count">>) ->
sum([<<"eventing_timer_callback_success">>,
<<"eventing_on_delete_success">>,
<<"eventing_on_update_success">>]);
get_metric(<<"eventing_failed_count">>) ->
sum([<<"eventing_bucket_op_exception_count">>,
<<"eventing_checkpoint_failure_count">>,
<<"eventing_doc_timer_create_failure">>,
<<"eventing_n1ql_op_exception_count">>,
<<"eventing_non_doc_timer_create_failure">>,
<<"eventing_on_delete_failure">>,
<<"eventing_on_update_failure">>,
<<"eventing_timer_callback_failure">>,
<<"eventing_timeout_count">>]);
%% Used by unit tests:
get_metric(<<"test_derived_metric">>) ->
[{aggregation_fun, fun (#{<<"p1">> := P1, <<"p2">> := P2}) ->
menelaus_web_stats:aggregate(sum, P1) *
(menelaus_web_stats:aggregate(sum, P2) + 1)
end},
{query, fun (M) ->
#{<<"p1">> => M(<<"m1">>), <<"p2">> => M(<<"m2">>)}
end}];
get_metric(_) -> [].
aggregated_ratio(Values1, Values2, DivisionByZeroDefault) ->
case menelaus_web_stats:aggregate(sum, Values2) of
0 -> DivisionByZeroDefault;
Total ->
menelaus_web_stats:aggregate(
'div', [menelaus_web_stats:aggregate(sum, Values1), Total])
end.
sum(MetricNames) ->
sum_across_labels(_(promQL:eq_any(<<"name">>, MetricNames)), [<<"name">>]).
sum_across_labels(Metric, Labels) ->
[{aggregation_fun, fun (#{<<"Param1">> := P1}) ->
menelaus_web_stats:aggregate(sum, P1)
end},
{query, fun (M) ->
#{<<"Param1">> => promQL:sum_without(Labels, Metric(M))}
end}].
ratio(Numerator, Denominator, Default) ->
[{aggregation_fun, fun (#{<<"Param1">> := P1, <<"Param2">> := P2}) ->
aggregated_ratio(P1, P2, Default)
end},
{query, fun (M) ->
#{<<"Param1">> => Numerator(M), <<"Param2">> => Denominator(M)}
end}].
ratio(Numerator, Denominator) ->
ratio(Numerator, Denominator, undefined).
percent(Numerator, Denominator, Default) ->
ratio(?cut(promQL:multiply_by_scalar(Numerator(_), 100)), Denominator,
Default).
opposite_percent(Numerator, Denominator) ->
percent(fun (M) -> promQL:op('-', [Denominator(M), Numerator(M)]) end,
Denominator, 100).