Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ResponseOps] Visualize alerting metrics in Stack Monitoring #123726

Merged
merged 125 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from 118 commits
Commits
Show all changes
125 commits
Select commit Hold shift + click to select a range
464035c
Add new plugint to collect additional kibana monitoring metrics
Jan 18, 2022
8b85956
Readme
Jan 18, 2022
c888390
Update generated document
Jan 18, 2022
83102fb
WIP
Jan 19, 2022
30e4364
Remove task manager and add support for max number
Jan 20, 2022
6d1dd61
Use MAX_SAFE_INTEGER
Jan 20, 2022
01b47ea
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Jan 20, 2022
75a246c
We won't use this route
Jan 20, 2022
f299203
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Jan 20, 2022
f356bb6
Tests and lint
Jan 20, 2022
f23b977
Merge branch 'main' into rops/monitoring_collection
kibanamachine Jan 24, 2022
ae29f8f
Track actions
Jan 24, 2022
1af47c3
Use dynamic route style
Jan 24, 2022
ff420a6
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Jan 24, 2022
261b1b0
Initial attempt
Jan 25, 2022
3ccaef6
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Jan 25, 2022
a1c3d07
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Jan 25, 2022
4f55170
Fix test
Jan 25, 2022
cecd0d4
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Jan 25, 2022
072e16e
Fix some tests
Jan 25, 2022
99212a6
Couple small fixes
Jan 26, 2022
4e087a2
Merge branch 'main' into rops/monitoring_collection
kibanamachine Jan 28, 2022
965d7fb
Add in mapping verification
Jan 28, 2022
6ab55ea
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Jan 28, 2022
c237843
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Jan 28, 2022
d054eef
Adapt to new changes in base PR
Jan 28, 2022
f4dfa07
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Jan 28, 2022
c61c1f2
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Jan 31, 2022
ff2ad0a
Fix types
Jan 31, 2022
feab3f4
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Feb 11, 2022
f7a2870
Feedback from PR
Feb 11, 2022
b7effa9
PR feedback
Feb 11, 2022
a40ad0d
We do not need this
Feb 11, 2022
072c835
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
Feb 14, 2022
b769022
PR feedback
Mar 1, 2022
1c9e9a8
Match options to api/stats
Mar 1, 2022
01693da
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Mar 1, 2022
7f0c5ec
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
chrisronline Mar 1, 2022
5637016
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Mar 1, 2022
681b2cc
Remove internal collection support
Mar 2, 2022
b0de242
Merge remote-tracking branch 'elastic/main' into rops/monitoring_coll…
chrisronline Mar 2, 2022
66d5413
Fix api change
chrisronline Mar 2, 2022
5d70111
Merge remote-tracking branch 'origin/rops/monitoring_collection' into…
Mar 2, 2022
a12158e
Fix small issues
Mar 2, 2022
96f8a71
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 9, 2022
f251f04
Separate cluster and node metrics
Mar 9, 2022
24d0915
Add more tests
Mar 9, 2022
539186b
Add retryAt in the test too
Mar 9, 2022
e5ac801
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 9, 2022
81fdbee
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 10, 2022
2094ed8
Add logging and use a class
Mar 10, 2022
8d8642d
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 10, 2022
7c8912a
fix types
Mar 10, 2022
28b2064
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 14, 2022
29c9bd2
Fix tests
Mar 14, 2022
f1562de
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 15, 2022
937330c
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Mar 15, 2022
7d93d3d
Fix bad merge
Mar 16, 2022
6b823c3
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Mar 16, 2022
882a182
Separate these two out
Mar 16, 2022
fd72f34
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 16, 2022
e3de0e9
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Mar 16, 2022
b925809
Update for new fields
Mar 16, 2022
98d2443
PR feedback
Mar 17, 2022
0f0070a
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Mar 17, 2022
10015f5
Update terminology and add timeouts
Mar 17, 2022
463dbe9
Add types
Mar 17, 2022
973c3b2
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Mar 17, 2022
dd54b92
Fix types
Mar 17, 2022
2e4156b
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Mar 17, 2022
8e6f8c4
Fix types and tests
Mar 17, 2022
911141e
Fix tests
Mar 17, 2022
f153012
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 18, 2022
8ed6458
Linting fixes
Mar 18, 2022
4d2d0c7
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 18, 2022
4ad5943
Merge remote-tracking branch 'elastic/main' into rops/rule_monitoring
Mar 21, 2022
1c24309
Merge remote-tracking branch 'origin/rops/rule_monitoring' into rops/…
Mar 21, 2022
bd1aa14
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Mar 29, 2022
eb2b7b8
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Mar 29, 2022
a37487f
Use MB fields directly
Mar 29, 2022
8be860a
Fix tests
Mar 29, 2022
cec2e43
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Apr 21, 2022
ced1d72
Fix snapshot
Apr 21, 2022
7bb0de3
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Apr 25, 2022
25b427f
Do not test for mappings for metricbeat-only metrics
Apr 25, 2022
0428ae0
Fix tests
Apr 26, 2022
fbdd07d
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Apr 26, 2022
13a96ab
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Apr 27, 2022
a17f5ca
PR feedback
Apr 27, 2022
8e8faad
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Apr 29, 2022
e8cf770
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 16, 2022
a6dcfdb
[CI] Auto-commit changed files from 'node scripts/eslint --no-cache -…
kibanamachine May 16, 2022
ac18424
Add actions
May 16, 2022
446efcf
Fix tests
May 16, 2022
535410b
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 16, 2022
9206609
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 18, 2022
d5f8bad
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 19, 2022
d42ac2d
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 20, 2022
f6b36b5
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 23, 2022
f00f153
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 24, 2022
45a9d05
Fix lint
May 24, 2022
a570e59
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 25, 2022
5bd3315
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 25, 2022
625a01a
Merge branch 'main' into rops/monitoring_ui_integration
kibanamachine May 31, 2022
45805ed
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
May 31, 2022
744548c
Hide from internal monitoring
May 31, 2022
84f5663
Add tests
May 31, 2022
84d3925
Fix tests
May 31, 2022
0954fda
Merge remote-tracking branch 'elastic/main' into rops/monitoring_ui_i…
Jun 1, 2022
bc79df0
PR feedback
Jun 1, 2022
f4a7aa5
Remove mappings
Jun 1, 2022
aefc2e1
More defensive
Jun 1, 2022
abc45bf
Skip for now until other work is done
Jun 1, 2022
30de5bd
Update x-pack/plugins/monitoring/server/lib/metrics/kibana/metrics.ts
matschaffer Jun 2, 2022
08280eb
update unit and fix related tests
neptunian Jun 3, 2022
80d0950
update test snapshot related to changes in unit
neptunian Jun 3, 2022
d9001bf
Merge branch 'main' into rops/monitoring_ui_integration
neptunian Jun 3, 2022
214c425
unskip api integration tests and update for unit change
neptunian Jun 3, 2022
ab019aa
fix merge conflicts
neptunian Jun 8, 2022
52c1811
Merge branch 'main' into rops/monitoring_ui_integration
kibanamachine Jun 8, 2022
b1fc464
Merge branch 'main' into rops/monitoring_ui_integration
neptunian Jun 9, 2022
fd0512e
update fixtures from merge
neptunian Jun 9, 2022
92c7fa4
fix description of graphs
neptunian Jun 9, 2022
7ca8600
update test fixture
neptunian Jun 9, 2022
2c6b946
update fixture and snapshot from text change
neptunian Jun 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -20,7 +20,6 @@ let mockedLicenseState: jest.Mocked<ILicenseState>;
let ruleTypeRegistryParams: ConstructorOptions;

const taskManager = taskManagerMock.createSetup();

const inMemoryMetrics = inMemoryMetricsMock.create();

beforeEach(() => {
Expand Down
Expand Up @@ -37,6 +37,11 @@ import { RULE_KIBANA_VERSION_MISMATCH } from '../../../../common/constants';
const KibanaInstance = ({ data, alerts }: { data: any; alerts: any }) => {
const { zoomInfo, onBrush } = useCharts();

const showRules =
data.metrics.kibana_instance_rule_executions &&
data.metrics.kibana_instance_rule_executions.length &&
data.metrics.kibana_instance_rule_executions[0].indices_found.metricbeat;

return (
<EuiPage>
<EuiPageBody>
Expand Down Expand Up @@ -95,6 +100,42 @@ const KibanaInstance = ({ data, alerts }: { data: any; alerts: any }) => {
/>
<EuiSpacer />
</EuiFlexItem>
{showRules && (
<>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_instance_rule_executions}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
<EuiSpacer />
</EuiFlexItem>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_instance_rule_failures}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
<EuiSpacer />
</EuiFlexItem>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_instance_action_executions}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
<EuiSpacer />
</EuiFlexItem>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_instance_action_failures}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
<EuiSpacer />
</EuiFlexItem>
</>
)}
</EuiFlexGrid>
</EuiPageContent>
</EuiPageBody>
Expand Down
Expand Up @@ -33,6 +33,11 @@ const KibanaOverview = ({ data }: { data: any }) => {

if (!data) return null;

const showRules =
data.metrics.kibana_cluster_rule_overdue_count &&
data.metrics.kibana_cluster_rule_overdue_count.length &&
data.metrics.kibana_cluster_rule_overdue_count[0].indices_found.metricbeat;

return (
<EuiPage>
<EuiPageBody>
Expand All @@ -57,6 +62,42 @@ const KibanaOverview = ({ data }: { data: any }) => {
/>
</EuiFlexItem>
</EuiFlexGroup>
{showRules && (
<>
<EuiFlexGroup>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_cluster_rule_overdue_count}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
</EuiFlexItem>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_cluster_rule_overdue_duration}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
</EuiFlexItem>
</EuiFlexGroup>
<EuiFlexGroup>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_cluster_action_overdue_count}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
</EuiFlexItem>
<EuiFlexItem grow={true}>
<MonitoringTimeseriesContainer
series={data.metrics.kibana_cluster_action_overdue_duration}
onBrush={onBrush}
zoomInfo={zoomInfo}
/>
</EuiFlexItem>
</EuiFlexGroup>
</>
)}
</EuiPageContent>
</EuiPageBody>
</EuiPage>
Expand Down
Expand Up @@ -6,7 +6,7 @@
*/

import React from 'react';
import { formatNumber } from '../../../lib/format_number';
import { formatNumber, formatPercentageUsage } from '../../../lib/format_number';
import {
ClusterItemContainer,
HealthStatusIndicator,
Expand Down Expand Up @@ -136,6 +136,31 @@ export function KibanaPanel(props) {
values={{ maxTime: props.response_time_max }}
/>
</EuiDescriptionListDescription>
{props.rules.instance && props.rules.cluster && (
<>
<EuiDescriptionListTitle className="eui-textBreakWord">
<FormattedMessage
id="xpack.monitoring.cluster.overview.kibanaPanel.ruleFailuresLabel"
defaultMessage="Rule Success Ratio"
/>
</EuiDescriptionListTitle>
<EuiDescriptionListDescription data-test-subj="kbnRuleFailures">
{formatPercentageUsage(
props.rules.instance.executions - props.rules.instance.failures,
props.rules.instance.executions
)}
</EuiDescriptionListDescription>
<EuiDescriptionListTitle className="eui-textBreakWord">
<FormattedMessage
id="xpack.monitoring.cluster.overview.kibanaPanel.overdueTaskCountLabel"
defaultMessage="Overdue Rules"
/>
</EuiDescriptionListTitle>
<EuiDescriptionListDescription data-test-subj="kbnOverdueRules">
{props.rules.cluster.overdue.count}
</EuiDescriptionListDescription>
</>
)}
</EuiDescriptionList>
</EuiPanel>
</EuiFlexItem>
Expand Down
Expand Up @@ -70,6 +70,7 @@ export class BulkUploader implements IBulkUploader {
private _timer: NodeJS.Timer | null;
private readonly _interval: number;
private readonly config: MonitoringConfig;

constructor({
log,
config,
Expand Down
Expand Up @@ -6,7 +6,7 @@
*/

import { notFound } from '@hapi/boom';
import { get } from 'lodash';
import { get, omit } from 'lodash';
import { set } from '@elastic/safer-lodash-set';
import { i18n } from '@kbn/i18n';
import { getClustersStats } from './get_clusters_stats';
Expand Down Expand Up @@ -39,6 +39,7 @@ import { getLogTypes } from '../logs';
import { isInCodePath } from './is_in_code_path';
import { LegacyRequest, Cluster } from '../../types';
import { RulesByType } from '../../../common/types/alerts';
import { getClusterRuleDataForClusters, getInstanceRuleDataForClusters } from '../kibana/rules';

/**
* Get all clusters or the cluster associated with {@code clusterUuid} when it is defined.
Expand Down Expand Up @@ -168,17 +169,38 @@ export async function getClustersFromRequest(
}
}
// add kibana data
const kibanas =
const [kibanas, kibanaClusterRules, kibanaInstanceRules] =
isInCodePath(codePaths, [CODE_PATH_KIBANA]) && !isStandaloneCluster
? await getKibanasForClusters(req, clusters, CCS_REMOTE_PATTERN)
: [];
? await Promise.all([
getKibanasForClusters(req, clusters, CCS_REMOTE_PATTERN),
getClusterRuleDataForClusters(req, clusters, CCS_REMOTE_PATTERN),
getInstanceRuleDataForClusters(req, clusters, CCS_REMOTE_PATTERN),
])
: [[], [], []];
// add the kibana data to each cluster
kibanas.forEach((kibana) => {
const clusterIndex = clusters.findIndex(
(cluster) =>
get(cluster, 'elasticsearch.cluster.id', cluster.cluster_uuid) === kibana.clusterUuid
);
set(clusters[clusterIndex], 'kibana', kibana.stats);

const clusterKibanaRules = kibanaClusterRules.every((rule) => !Boolean(rule))
? null
: kibanaClusterRules?.find((rule) => rule?.clusterUuid === kibana.clusterUuid);
const instanceKibanaRules = kibanaInstanceRules.every((rule) => !Boolean(rule))
? null
: kibanaInstanceRules?.find((rule) => rule?.clusterUuid === kibana.clusterUuid);
set(
clusters[clusterIndex],
'kibana.rules.cluster',
clusterKibanaRules ? omit(clusterKibanaRules, 'clusterUuid') : null
);
set(
clusters[clusterIndex],
'kibana.rules.instance',
instanceKibanaRules ? omit(instanceKibanaRules, 'clusterUuid') : null
);
});

// add logstash data
Expand Down
44 changes: 38 additions & 6 deletions x-pack/plugins/monitoring/server/lib/details/get_series.ts
Expand Up @@ -18,13 +18,16 @@ import {
CALCULATE_DURATION_UNTIL,
INDEX_PATTERN_TYPES,
STANDALONE_CLUSTER_CLUSTER_UUID,
METRICBEAT_INDEX_NAME_UNIQUE_TOKEN,
} from '../../../common/constants';
import { formatUTCTimestampForTimezone } from '../format_timezone';
import { getNewIndexPatterns } from '../cluster/get_index_patterns';
import { Globals } from '../../static_globals';
import type { Metric } from '../metrics/metrics';

type SeriesBucket = Bucket & { metric_mb_deriv?: { normalized_value: number } };
type SeriesBucket = Bucket & { metric_mb_deriv?: { normalized_value: number } } & {
indices?: { buckets: Array<{ [key: string]: any }> };
};

/**
* Derivative metrics for the first two agg buckets are unusable. For the first bucket, there
Expand Down Expand Up @@ -152,6 +155,11 @@ async function fetchSeries(
},
aggs: {
...dateHistogramSubAggs,
indices: {
terms: {
field: '_index',
},
},
},
},
};
Expand Down Expand Up @@ -267,7 +275,7 @@ function handleSeries(
timezone: string,
response: ElasticsearchResponse
) {
const { derivative, calculation: customCalculation } = metric;
const { derivative, calculation: customCalculation, isNotSupportedInInternalCollection } = metric;

function getAggregatedData(buckets: SeriesBucket[]) {
const firstUsableBucketIndex = findFirstUsableBucketIndex(buckets, min);
Expand All @@ -277,28 +285,52 @@ function handleSeries(
firstUsableBucketIndex,
bucketSizeInSeconds * 1000
);
let internalIndicesFound = false;
let mbIndicesFound = false;
let data: Array<[string | number, number | null]> = [];

if (firstUsableBucketIndex <= lastUsableBucketIndex) {
// map buckets to values for charts
const key = derivative ? 'metric_deriv.normalized_value' : 'metric.value';
const calculation = customCalculation !== undefined ? customCalculation : defaultCalculation;
const usableBuckets = buckets.slice(firstUsableBucketIndex, lastUsableBucketIndex + 1); // take only the buckets we know are usable

data = usableBuckets.map((bucket) => {
// map buckets to X/Y coords for Flot charting
if (bucket.indices) {
for (const indexBucket of bucket.indices.buckets) {
if (indexBucket.key.includes(METRICBEAT_INDEX_NAME_UNIQUE_TOKEN)) {
mbIndicesFound = true;
} else {
internalIndicesFound = true;
}
}
}

data = buckets
.slice(firstUsableBucketIndex, lastUsableBucketIndex + 1) // take only the buckets we know are usable
.map((bucket) => [
return [
formatUTCTimestampForTimezone(bucket.key, timezone),
calculation(bucket, key, metric, bucketSizeInSeconds),
]); // map buckets to X/Y coords for Flot charting
];
});
}

const indexSourceData = isNotSupportedInInternalCollection
? {
indices_found: {
internal: internalIndicesFound,
metricbeat: mbIndicesFound,
},
}
: {};

return {
bucket_size: formatBucketSize(bucketSizeInSeconds),
timeRange: {
min: formatUTCTimestampForTimezone(min, timezone),
max: formatUTCTimestampForTimezone(max, timezone),
},
metric: metric.serialize(),
...indexSourceData,
data,
};
}
Expand Down