From d7be514b94dd04272be583802d0bcfa9d2dd256f Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Wed, 21 Dec 2022 22:37:17 +0100 Subject: [PATCH] [ML] Explain Log Rate Spikes: Additional unit tests. (#147451) This breaks out inline code in `x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts` to functions in separate files and adds jest unit tests for each function. The mocks used for jest unit tests are used as expected data in the API integration tests. This allows to make sure that the static mocks are still up to date should we have to update API integration tests based on upstream tests. --- .../artificial_logs/change_point_groups.ts | 20 ++ .../artificial_logs/change_points.ts | 53 ++++++ .../__mocks__/artificial_logs/fields.ts | 8 + .../filtered_frequent_items.ts | 27 +++ .../final_change_point_groups.ts | 27 +++ .../artificial_logs/frequent_items.ts | 59 ++++++ .../farequote/change_point_groups.ts | 41 +++++ x-pack/plugins/aiops/common/types.ts | 33 ++++ .../application/utils/query_utils.test.ts | 2 +- .../public/application/utils/query_utils.ts | 2 +- .../explain_log_rate_spikes_analysis.tsx | 42 +---- .../explain_log_rate_spikes_page.tsx | 2 +- .../get_group_table_items.test.ts | 53 ++++++ .../get_group_table_items.ts | 40 ++++ .../components/spike_analysis_table/index.ts | 1 + .../spike_analysis_table_groups.tsx | 10 +- .../spike_analysis_table_row_provider.tsx | 2 +- .../components/spike_analysis_table/types.ts | 17 ++ .../aiops/public/get_document_stats.ts | 2 +- x-pack/plugins/aiops/public/hooks/use_data.ts | 2 +- .../server/routes/explain_log_rate_spikes.ts | 173 +----------------- .../routes/queries/duplicate_identifier.ts | 18 ++ .../routes/queries/fetch_frequent_items.ts | 17 +- .../queries/get_change_point_groups.test.ts | 21 +++ .../routes/queries/get_change_point_groups.ts | 74 ++++++++ .../get_field_value_pair_counts.test.ts | 50 +++++ .../queries/get_field_value_pair_counts.ts | 25 +++ .../get_filtered_frequent_items.test.ts | 20 ++ .../queries/get_filtered_frequent_items.ts | 47 +++++ .../routes/queries/get_group_filter.test.ts | 25 +-- ...get_groups_with_readded_duplicates.test.ts | 55 ++++++ .../get_groups_with_readded_duplicates.ts | 47 +++++ .../queries/get_marked_duplicates.test.ts | 91 +++++++++ .../routes/queries/get_marked_duplicates.ts | 30 +++ .../queries/get_missing_change_points.test.ts | 52 ++++++ .../queries/get_missing_change_points.ts | 19 ++ .../get_simple_hierarchical_tree.test.ts | 133 +++++--------- .../queries/get_simple_hierarchical_tree.ts | 131 +------------ ...et_simple_hierarchical_tree_leaves.test.ts | 35 ++++ .../get_simple_hierarchical_tree_leaves.ts | 42 +++++ .../routes/queries/get_value_counts.test.ts | 31 ++++ .../server/routes/queries/get_value_counts.ts | 18 ++ .../queries/get_values_descending.test.ts | 23 +++ .../routes/queries/get_values_descending.ts | 19 ++ .../transform_change_point_to_group.test.ts | 48 +++++ .../transform_change_point_to_group.ts | 55 ++++++ .../api_integration/apis/aiops/test_data.ts | 72 +------- 47 files changed, 1287 insertions(+), 527 deletions(-) create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts create mode 100644 x-pack/plugins/aiops/common/types.ts create mode 100644 x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts create mode 100644 x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts create mode 100644 x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts create mode 100644 x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts new file mode 100644 index 00000000000000..9b580ac36ce262 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const changePointGroups: ChangePointGroup[] = [ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts new file mode 100644 index 00000000000000..5b4597a2ecd8a0 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const changePoints = [ + { + fieldName: 'response_code', + fieldValue: '500', + doc_count: 1821, + bg_count: 553, + total_doc_count: 4671, + total_bg_count: 1975, + score: 26.546201745993947, + pValue: 2.9589053032077285e-12, + normalizedScore: 0.7814127409489161, + }, + { + fieldName: 'url', + fieldValue: 'home.php', + doc_count: 1742, + bg_count: 632, + total_doc_count: 4671, + total_bg_count: 1975, + score: 4.53094842981472, + pValue: 0.010770456205312423, + normalizedScore: 0.10333028878375965, + }, + { + fieldName: 'url', + fieldValue: 'login.php', + doc_count: 1742, + bg_count: 632, + total_doc_count: 4671, + total_bg_count: 1975, + score: 4.53094842981472, + pValue: 0.010770456205312423, + normalizedScore: 0.10333028878375965, + }, + { + fieldName: 'user', + fieldValue: 'Peter', + doc_count: 1981, + bg_count: 553, + total_doc_count: 4671, + total_bg_count: 1975, + score: 47.34435085428873, + pValue: 2.7454255728359757e-21, + normalizedScore: 0.8327337555873047, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts new file mode 100644 index 00000000000000..bb42e8a6048e7b --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const fields = ['response_code', 'url', 'user']; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts new file mode 100644 index 00000000000000..268516f95542df --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../types'; + +export const filteredFrequentItems: ItemsetResult[] = [ + { + set: { response_code: '500', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 792, + support: 0.5262458471760797, + total_doc_count: 1505, + }, + { + set: { user: 'Peter', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 634, + support: 0.4212624584717608, + total_doc_count: 1505, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts new file mode 100644 index 00000000000000..dce10c1280985c --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const finalChangePointGroups: ChangePointGroup[] = [ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500', duplicate: false }, + { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, + { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, + { + id: '817080373', + group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], + docCount: 1981, + pValue: 2.7454255728359757e-21, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts new file mode 100644 index 00000000000000..fe61a60a1afbe5 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../types'; + +export const frequentItems: ItemsetResult[] = [ + { + set: { response_code: '500', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 792, + support: 0.5262458471760797, + total_doc_count: 1505, + }, + { + set: { user: 'Peter', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 634, + support: 0.4212624584717608, + total_doc_count: 1505, + }, + { + set: { response_code: '500', user: 'Mary', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 396, + support: 0.26312292358803985, + total_doc_count: 1505, + }, + { + set: { response_code: '500', user: 'Paul', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 396, + support: 0.26312292358803985, + total_doc_count: 1505, + }, + { + set: { response_code: '404', user: 'Peter', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 317, + support: 0.2106312292358804, + total_doc_count: 1505, + }, + { + set: { response_code: '200', user: 'Peter', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 317, + support: 0.2106312292358804, + total_doc_count: 1505, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts new file mode 100644 index 00000000000000..b30303e384f848 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const changePointGroups: ChangePointGroup[] = [ + { + id: 'group-1', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + }, + { + fieldName: 'airline', + fieldValue: 'UAL', + }, + ], + docCount: 101, + pValue: 0.01, + }, + { + id: 'group-2', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + }, + { + fieldName: 'airline', + fieldValue: 'AAL', + }, + ], + docCount: 49, + pValue: 0.001, + }, +]; diff --git a/x-pack/plugins/aiops/common/types.ts b/x-pack/plugins/aiops/common/types.ts new file mode 100644 index 00000000000000..0acb3d07883ad2 --- /dev/null +++ b/x-pack/plugins/aiops/common/types.ts @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; + +export interface ChangePointDuplicateGroup { + keys: Pick; + group: ChangePoint[]; +} + +export type FieldValuePairCounts = Record>; + +export interface ItemsetResult { + set: Record; + size: number; + maxPValue: number; + doc_count: number; + support: number; + total_doc_count: number; +} + +export interface SimpleHierarchicalTreeNode { + name: string; + set: FieldValuePair[]; + docCount: number; + pValue: number | null; + children: SimpleHierarchicalTreeNode[]; + addNode: (node: SimpleHierarchicalTreeNode) => void; +} diff --git a/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts b/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts index c886b16fa0ec2f..7a2650f02d3a96 100644 --- a/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts +++ b/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts @@ -7,7 +7,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../../components/spike_analysis_table/types'; import { buildBaseFilterCriteria } from './query_utils'; diff --git a/x-pack/plugins/aiops/public/application/utils/query_utils.ts b/x-pack/plugins/aiops/public/application/utils/query_utils.ts index 0c0363d852bc96..94f5bc4d9f70b2 100644 --- a/x-pack/plugins/aiops/public/application/utils/query_utils.ts +++ b/x-pack/plugins/aiops/public/application/utils/query_utils.ts @@ -16,7 +16,7 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { Query } from '@kbn/es-query'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../../components/spike_analysis_table/types'; /* * Contains utility functions for building and processing queries. diff --git a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx index e84f50b02711c3..b458e1da73516d 100644 --- a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx +++ b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx @@ -25,14 +25,17 @@ import type { WindowParameters } from '@kbn/aiops-utils'; import { i18n } from '@kbn/i18n'; import { FormattedMessage } from '@kbn/i18n-react'; import type { Query } from '@kbn/es-query'; -import type { FieldValuePair } from '@kbn/ml-agg-utils'; import { useAiopsAppContext } from '../../hooks/use_aiops_app_context'; import { initialState, streamReducer } from '../../../common/api/stream_reducer'; import type { ApiExplainLogRateSpikes } from '../../../common/api'; -import { SpikeAnalysisGroupsTable } from '../spike_analysis_table'; -import { SpikeAnalysisTable } from '../spike_analysis_table'; +import { + getGroupTableItems, + SpikeAnalysisTable, + SpikeAnalysisGroupsTable, +} from '../spike_analysis_table'; +import {} from '../spike_analysis_table'; import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider'; const groupResultsMessage = i18n.translate( @@ -159,35 +162,10 @@ export const ExplainLogRateSpikesAnalysis: FC // eslint-disable-next-line react-hooks/exhaustive-deps }, []); - const groupTableItems = useMemo(() => { - const tableItems = data.changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => { - const sortedGroup = group.sort((a, b) => - a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0 - ); - const dedupedGroup: FieldValuePair[] = []; - const repeatedValues: FieldValuePair[] = []; - - sortedGroup.forEach((pair) => { - const { fieldName, fieldValue } = pair; - if (pair.duplicate === false) { - dedupedGroup.push({ fieldName, fieldValue }); - } else { - repeatedValues.push({ fieldName, fieldValue }); - } - }); - - return { - id, - docCount, - pValue, - group: dedupedGroup, - repeatedValues, - histogram, - }; - }); - - return tableItems; - }, [data.changePointsGroups]); + const groupTableItems = useMemo( + () => getGroupTableItems(data.changePointsGroups), + [data.changePointsGroups] + ); const shouldRerunAnalysis = useMemo( () => diff --git a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx index 6595a9f68db94d..a6327331a51050 100644 --- a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx +++ b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx @@ -38,7 +38,7 @@ import { SearchPanel } from '../search_panel'; import { restorableDefaults } from './explain_log_rate_spikes_app_state'; import { ExplainLogRateSpikesAnalysis } from './explain_log_rate_spikes_analysis'; -import type { GroupTableItem } from '../spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../spike_analysis_table/types'; import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider'; // TODO port to `@emotion/react` once `useEuiBreakpoint` is available https://github.com/elastic/eui/pull/6057 diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts new file mode 100644 index 00000000000000..8807479da12420 --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; + +import { getGroupTableItems } from './get_group_table_items'; + +describe('getGroupTableItems', () => { + it('transforms change point groups into table items', () => { + const groupTableItems = getGroupTableItems(finalChangePointGroups); + + expect(groupTableItems).toEqual([ + { + docCount: 792, + group: [ + { + fieldName: 'response_code', + fieldValue: '500', + }, + { + fieldName: 'url', + fieldValue: 'home.php', + }, + { + fieldName: 'url', + fieldValue: 'login.php', + }, + ], + histogram: undefined, + id: '2038579476', + pValue: 0.010770456205312423, + repeatedValues: [], + }, + { + docCount: 1981, + group: [ + { + fieldName: 'user', + fieldValue: 'Peter', + }, + ], + histogram: undefined, + id: '817080373', + pValue: 2.7454255728359757e-21, + repeatedValues: [], + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts new file mode 100644 index 00000000000000..9135a5449e504e --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils'; + +import type { GroupTableItem } from './types'; + +export function getGroupTableItems(changePointsGroups: ChangePointGroup[]): GroupTableItem[] { + const tableItems = changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => { + const sortedGroup = group.sort((a, b) => + a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0 + ); + const dedupedGroup: FieldValuePair[] = []; + const repeatedValues: FieldValuePair[] = []; + + sortedGroup.forEach((pair) => { + const { fieldName, fieldValue } = pair; + if (pair.duplicate === false) { + dedupedGroup.push({ fieldName, fieldValue }); + } else { + repeatedValues.push({ fieldName, fieldValue }); + } + }); + + return { + id, + docCount, + pValue, + group: dedupedGroup, + repeatedValues, + histogram, + }; + }); + + return tableItems; +} diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts index 120b15b03364f1..39452f67d1d6ca 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts @@ -5,5 +5,6 @@ * 2.0. */ +export { getGroupTableItems } from './get_group_table_items'; export { SpikeAnalysisTable } from './spike_analysis_table'; export { SpikeAnalysisGroupsTable } from './spike_analysis_table_groups'; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx index 25518598537714..642f09c66765c3 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx @@ -37,6 +37,7 @@ import { MiniHistogram } from '../mini_histogram'; import { getFailedTransactionsCorrelationImpactLabel } from './get_failed_transactions_correlation_impact_label'; import { SpikeAnalysisTable } from './spike_analysis_table'; import { useSpikeAnalysisTableRowContext } from './spike_analysis_table_row_provider'; +import type { GroupTableItem } from './types'; const NARROW_COLUMN_WIDTH = '120px'; const EXPAND_COLUMN_WIDTH = '40px'; @@ -54,15 +55,6 @@ const viewInDiscoverMessage = i18n.translate( } ); -export interface GroupTableItem { - id: string; - docCount: number; - pValue: number | null; - group: FieldValuePair[]; - repeatedValues: FieldValuePair[]; - histogram: ChangePoint['histogram']; -} - interface SpikeAnalysisTableProps { changePoints: ChangePoint[]; groupTableItems: GroupTableItem[]; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx index 88b6e508d2f4cd..11f61777430ee2 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx @@ -17,7 +17,7 @@ import React, { import type { ChangePoint } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from './spike_analysis_table_groups'; +import type { GroupTableItem } from './types'; type ChangePointOrNull = ChangePoint | null; type GroupOrNull = GroupTableItem | null; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts new file mode 100644 index 00000000000000..842816f5095a62 --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; + +export interface GroupTableItem { + id: string; + docCount: number; + pValue: number | null; + group: FieldValuePair[]; + repeatedValues: FieldValuePair[]; + histogram: ChangePoint['histogram']; +} diff --git a/x-pack/plugins/aiops/public/get_document_stats.ts b/x-pack/plugins/aiops/public/get_document_stats.ts index b5960a50a4ba0e..f95e0eb2cc6108 100644 --- a/x-pack/plugins/aiops/public/get_document_stats.ts +++ b/x-pack/plugins/aiops/public/get_document_stats.ts @@ -14,7 +14,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils'; import type { Query } from '@kbn/es-query'; import { buildBaseFilterCriteria } from './application/utils/query_utils'; -import { GroupTableItem } from './components/spike_analysis_table/spike_analysis_table_groups'; +import { GroupTableItem } from './components/spike_analysis_table/types'; export interface DocumentCountStats { interval?: number; diff --git a/x-pack/plugins/aiops/public/hooks/use_data.ts b/x-pack/plugins/aiops/public/hooks/use_data.ts index 75554d5a9b96ed..73b5f79be3b4f4 100644 --- a/x-pack/plugins/aiops/public/hooks/use_data.ts +++ b/x-pack/plugins/aiops/public/hooks/use_data.ts @@ -27,7 +27,7 @@ import { import { useTimefilter } from './use_time_filter'; import { useDocumentCountStats } from './use_document_count_stats'; import type { Dictionary } from './use_url_state'; -import type { GroupTableItem } from '../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../components/spike_analysis_table/types'; const DEFAULT_BAR_TARGET = 75; diff --git a/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts b/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts index 03021415410ede..dfbc313632efc9 100644 --- a/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts +++ b/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts @@ -6,7 +6,6 @@ */ import { queue } from 'async'; -import { uniqWith, isEqual } from 'lodash'; import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; @@ -23,7 +22,6 @@ import type { NumericHistogramField, } from '@kbn/ml-agg-utils'; import { fetchHistogramsForFields } from '@kbn/ml-agg-utils'; -import { stringHash } from '@kbn/ml-string-hash'; import { addChangePointsAction, @@ -43,22 +41,13 @@ import { API_ENDPOINT } from '../../common/api'; import { isRequestAbortedError } from '../lib/is_request_aborted_error'; import type { AiopsLicense } from '../types'; +import { duplicateIdentifier } from './queries/duplicate_identifier'; import { fetchChangePointPValues } from './queries/fetch_change_point_p_values'; import { fetchIndexInfo } from './queries/fetch_index_info'; -import { - dropDuplicates, - fetchFrequentItems, - groupDuplicates, -} from './queries/fetch_frequent_items'; -import type { ItemsetResult } from './queries/fetch_frequent_items'; +import { dropDuplicates, fetchFrequentItems } from './queries/fetch_frequent_items'; import { getHistogramQuery } from './queries/get_histogram_query'; -import { - getFieldValuePairCounts, - getSimpleHierarchicalTree, - getSimpleHierarchicalTreeLeaves, - markDuplicates, -} from './queries/get_simple_hierarchical_tree'; import { getGroupFilter } from './queries/get_group_filter'; +import { getChangePointGroups } from './queries/get_change_point_groups'; // 10s ping frequency to keep the stream alive. const PING_FREQUENCY = 10000; @@ -434,25 +423,9 @@ export const defineExplainLogRateSpikesRoute = ( }) ); - // To optimize the `frequent_items` query, we identify duplicate change points by count attributes. - // Note this is a compromise and not 100% accurate because there could be change points that - // have the exact same counts but still don't co-occur. - const duplicateIdentifier: Array = [ - 'doc_count', - 'bg_count', - 'total_doc_count', - 'total_bg_count', - ]; - - // These are the deduplicated change points we pass to the `frequent_items` aggregation. + // Deduplicated change points we pass to the `frequent_items` aggregation. const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); - // We use the grouped change points to later repopulate - // the `frequent_items` result with the missing duplicates. - const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( - (g) => g.group.length > 1 - ); - try { const { fields, df } = await fetchFrequentItems( client, @@ -475,143 +448,9 @@ export const defineExplainLogRateSpikesRoute = ( } if (fields.length > 0 && df.length > 0) { - // The way the `frequent_items` aggregations works could return item sets that include - // field/value pairs that are not part of the original list of significant change points. - // This cleans up groups and removes those unrelated field/value pairs. - const filteredDf = df - .map((fi, fiIndex) => { - const updatedSet = Object.entries(fi.set).reduce( - (set, [field, value]) => { - if ( - changePoints.some( - (cp) => cp.fieldName === field && cp.fieldValue === value - ) - ) { - set[field] = value; - } - return set; - }, - {} - ); - - // only assign the updated reduced set if it doesn't already match - // an existing set. if there's a match just add an empty set - // so it will be filtered in the last step. - fi.set = df.some((d, dIndex) => fiIndex !== dIndex && isEqual(fi.set, d.set)) - ? {} - : updatedSet; - - fi.size = Object.keys(fi.set).length; - - return fi; - }) - .filter((fi) => fi.size > 1); - - // `frequent_items` returns lot of different small groups of field/value pairs that co-occur. - // The following steps analyse these small groups, identify overlap between these groups, - // and then summarize them in larger groups where possible. - - // Get a tree structure based on `frequent_items`. - const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields); - - // Each leave of the tree will be a summarized group of co-occuring field/value pairs. - const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []); - - // To be able to display a more cleaned up results table in the UI, we identify field/value pairs - // that occur in multiple groups. This will allow us to highlight field/value pairs that are - // unique to a group in a better way. This step will also re-add duplicates we identified in the - // beginning and didn't pass on to the `frequent_items` agg. - const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves); - const changePointGroups = markDuplicates(treeLeaves, fieldValuePairCounts).map( - (g) => { - const group = [...g.group]; - - for (const groupItem of g.group) { - const { duplicate } = groupItem; - const duplicates = groupedChangePoints.find((d) => - d.group.some( - (dg) => - dg.fieldName === groupItem.fieldName && - dg.fieldValue === groupItem.fieldValue - ) - ); - - if (duplicates !== undefined) { - group.push( - ...duplicates.group.map((d) => { - return { - fieldName: d.fieldName, - fieldValue: d.fieldValue, - duplicate, - }; - }) - ); - } - } - - return { - ...g, - group: uniqWith(group, (a, b) => isEqual(a, b)), - }; - } - ); - - // Some field/value pairs might not be part of the `frequent_items` result set, for example - // because they don't co-occur with other field/value pairs or because of the limits we set on the query. - // In this next part we identify those missing pairs and add them as individual groups. - const missingChangePoints = deduplicatedChangePoints.filter((cp) => { - return !changePointGroups.some((cpg) => { - return cpg.group.some( - (d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue - ); - }); - }); - - changePointGroups.push( - ...missingChangePoints.map( - ({ fieldName, fieldValue, doc_count: docCount, pValue }) => { - const duplicates = groupedChangePoints.find((d) => - d.group.some( - (dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue - ) - ); - if (duplicates !== undefined) { - return { - id: `${stringHash( - JSON.stringify( - duplicates.group.map((d) => ({ - fieldName: d.fieldName, - fieldValue: d.fieldValue, - })) - ) - )}`, - group: duplicates.group.map((d) => ({ - fieldName: d.fieldName, - fieldValue: d.fieldValue, - duplicate: false, - })), - docCount, - pValue, - }; - } else { - return { - id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`, - group: [ - { - fieldName, - fieldValue, - duplicate: false, - }, - ], - docCount, - pValue, - }; - } - } - ) - ); + const changePointGroups = getChangePointGroups(df, changePoints, fields); - // Finally, we'll find out if there's at least one group with at least two items, + // We'll find out if there's at least one group with at least two items, // only then will we return the groups to the clients and make the grouping option available. const maxItems = Math.max(...changePointGroups.map((g) => g.group.length)); diff --git a/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts b/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts new file mode 100644 index 00000000000000..d996c060f7fe96 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +// To optimize the `frequent_items` query, we identify duplicate change points by count attributes. +// Note this is a compromise and not 100% accurate because there could be change points that +// have the exact same counts but still don't co-occur. +export const duplicateIdentifier: Array = [ + 'doc_count', + 'bg_count', + 'total_doc_count', + 'total_bg_count', +]; diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts index ff1fba16f28f24..76aeeb1eabcdbd 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts @@ -11,9 +11,11 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server'; import type { Logger } from '@kbn/logging'; -import { type ChangePoint, type FieldValuePair, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils'; +import { type ChangePoint, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; +import type { ChangePointDuplicateGroup, ItemsetResult } from '../../../common/types'; + const FREQUENT_ITEMS_FIELDS_LIMIT = 15; interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation { @@ -34,10 +36,6 @@ export function dropDuplicates(cps: ChangePoint[], uniqueFields: Array isEqual(pick(a, uniqueFields), pick(b, uniqueFields))); } -interface ChangePointDuplicateGroup { - keys: Pick; - group: ChangePoint[]; -} export function groupDuplicates(cps: ChangePoint[], uniqueFields: Array) { const groups: ChangePointDuplicateGroup[] = []; @@ -226,12 +224,3 @@ export async function fetchFrequentItems( totalDocCount: totalDocCountFi, }; } - -export interface ItemsetResult { - set: Record; - size: number; - maxPValue: number; - doc_count: number; - support: number; - total_doc_count: number; -} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts new file mode 100644 index 00000000000000..2496e9e927f0ed --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; + +import { getChangePointGroups } from './get_change_point_groups'; + +describe('getChangePointGroups', () => { + it('gets change point groups', () => { + const changePointGroups = getChangePointGroups(frequentItems, changePoints, fields); + + expect(changePointGroups).toEqual(finalChangePointGroups); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts new file mode 100644 index 00000000000000..fa0722b15c9a4f --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; +import { getFilteredFrequentItems } from './get_filtered_frequent_items'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; +import { transformChangePointToGroup } from './transform_change_point_to_group'; +import type { ItemsetResult } from '../../../common/types'; + +export function getChangePointGroups( + itemsets: ItemsetResult[], + changePoints: ChangePoint[], + fields: string[] +): ChangePointGroup[] { + // These are the deduplicated change points we pass to the `frequent_items` aggregation. + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + // We use the grouped change points to later repopulate + // the `frequent_items` result with the missing duplicates. + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const filteredDf = getFilteredFrequentItems(itemsets, changePoints); + + // `frequent_items` returns lot of different small groups of field/value pairs that co-occur. + // The following steps analyse these small groups, identify overlap between these groups, + // and then summarize them in larger groups where possible. + + // Get a tree structure based on `frequent_items`. + const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields); + + // Each leave of the tree will be a summarized group of co-occuring field/value pairs. + const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []); + + // To be able to display a more cleaned up results table in the UI, we identify field/value pairs + // that occur in multiple groups. This will allow us to highlight field/value pairs that are + // unique to a group in a better way. This step will also re-add duplicates we identified in the + // beginning and didn't pass on to the `frequent_items` agg. + const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves); + const changePointGroupsWithMarkedDuplicates = getMarkedDuplicates( + treeLeaves, + fieldValuePairCounts + ); + const changePointGroups = getGroupsWithReaddedDuplicates( + changePointGroupsWithMarkedDuplicates, + groupedChangePoints + ); + + // Some field/value pairs might not be part of the `frequent_items` result set, for example + // because they don't co-occur with other field/value pairs or because of the limits we set on the query. + // In this next part we identify those missing pairs and add them as individual groups. + const missingChangePoints = getMissingChangePoints(deduplicatedChangePoints, changePointGroups); + + changePointGroups.push( + ...missingChangePoints.map((changePoint) => + transformChangePointToGroup(changePoint, groupedChangePoints) + ) + ); + + return changePointGroups; +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts new file mode 100644 index 00000000000000..d5b56751fa1732 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('getFieldValuePairCounts', () => { + it('returns a nested record with field/value pair counts for farequote', () => { + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + + expect(fieldValuePairCounts).toEqual({ + airline: { + AAL: 1, + UAL: 1, + }, + 'custom_field.keyword': { + deviation: 2, + }, + }); + }); + + it('returns a nested record with field/value pair counts for artificial logs', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + const fieldValuePairCounts = getFieldValuePairCounts(leaves); + + expect(fieldValuePairCounts).toEqual({ + response_code: { + '500': 1, + }, + url: { + 'home.php': 1, + }, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts new file mode 100644 index 00000000000000..1c5f5ca33d7184 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { FieldValuePairCounts } from '../../../common/types'; + +/** + * Get a nested record of field/value pairs with counts + */ +export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts { + return cpgs.reduce((p, { group }) => { + group.forEach(({ fieldName, fieldValue }) => { + if (p[fieldName] === undefined) { + p[fieldName] = {}; + } + p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1; + }); + return p; + }, {}); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts new file mode 100644 index 00000000000000..8399c0366dea10 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFilteredFrequentItems } from './get_filtered_frequent_items'; + +describe('getFilteredFrequentItems', () => { + it('filter frequent item set based on provided change points', () => { + expect(getFilteredFrequentItems(frequentItems, changePoints)).toStrictEqual( + filteredFrequentItems + ); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts new file mode 100644 index 00000000000000..e071621f6d9f7e --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { isEqual } from 'lodash'; + +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +import type { ItemsetResult } from '../../../common/types'; + +// The way the `frequent_items` aggregation works could return item sets that include +// field/value pairs that are not part of the original list of significant change points. +// This cleans up groups and removes those unrelated field/value pairs. +export function getFilteredFrequentItems( + itemsets: ItemsetResult[], + changePoints: ChangePoint[] +): ItemsetResult[] { + return itemsets.reduce((p, itemset, itemsetIndex) => { + // Remove field/value pairs not part of the provided change points + itemset.set = Object.entries(itemset.set).reduce( + (set, [field, value]) => { + if (changePoints.some((cp) => cp.fieldName === field && cp.fieldValue === value)) { + set[field] = value; + } + return set; + }, + {} + ); + + // Only assign the updated reduced set if it doesn't already match + // an existing set. if there's a match just add an empty set + // so it will be filtered in the last step. + if (itemsets.some((d, dIndex) => itemsetIndex !== dIndex && isEqual(itemset.set, d.set))) { + return p; + } + + // Update the size attribute to match the possibly updated set + itemset.size = Object.keys(itemset.set).length; + + p.push(itemset); + + return p; + }, []); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts index 432450ede9b455..b2c15d70e83f93 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts @@ -5,30 +5,13 @@ * 2.0. */ -import { getGroupFilter } from './get_group_filter'; +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; -const changePointGroups = [ - { - id: '2038579476', - group: [ - { fieldName: 'response_code', fieldValue: '500', duplicate: false }, - { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, - { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, - ], - docCount: 792, - pValue: 0.010770456205312423, - }, - { - id: '817080373', - group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], - docCount: 1981, - pValue: 2.7454255728359757e-21, - }, -]; +import { getGroupFilter } from './get_group_filter'; describe('getGroupFilter', () => { it('gets a query filter for the change points of a group with multiple values per field', () => { - expect(getGroupFilter(changePointGroups[0])).toStrictEqual([ + expect(getGroupFilter(finalChangePointGroups[0])).toStrictEqual([ { term: { response_code: '500', @@ -43,7 +26,7 @@ describe('getGroupFilter', () => { }); it('gets a query filter for the change points of a group with just a single field/value', () => { - expect(getGroupFilter(changePointGroups[1])).toStrictEqual([ + expect(getGroupFilter(finalChangePointGroups[1])).toStrictEqual([ { term: { user: 'Peter', diff --git a/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts new file mode 100644 index 00000000000000..50b5719e49fa4c --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; + +describe('getGroupsWithReaddedDuplicates', () => { + it('gets groups with readded duplicates', () => { + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + expect(groupsWithReaddedDuplicates).toEqual([ + { + docCount: 792, + group: [ + { + duplicate: false, + fieldName: 'response_code', + fieldValue: '500', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'home.php', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'login.php', + }, + ], + id: '2038579476', + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts new file mode 100644 index 00000000000000..91622c2b15419e --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { uniqWith, isEqual } from 'lodash'; + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { ChangePointDuplicateGroup } from '../../../common/types'; + +export function getGroupsWithReaddedDuplicates( + groups: ChangePointGroup[], + groupedChangePoints: ChangePointDuplicateGroup[] +): ChangePointGroup[] { + return groups.map((g) => { + const group = [...g.group]; + + for (const groupItem of g.group) { + const { duplicate } = groupItem; + const duplicates = groupedChangePoints.find((d) => + d.group.some( + (dg) => dg.fieldName === groupItem.fieldName && dg.fieldValue === groupItem.fieldValue + ) + ); + + if (duplicates !== undefined) { + group.push( + ...duplicates.group.map((d) => { + return { + fieldName: d.fieldName, + fieldValue: d.fieldValue, + duplicate, + }; + }) + ); + } + } + + return { + ...g, + group: uniqWith(group, (a, b) => isEqual(a, b)), + }; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts new file mode 100644 index 00000000000000..e44a26d70494cf --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('markDuplicates', () => { + it('marks duplicates based on change point groups for farequote', () => { + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + + expect(markedDuplicates).toEqual([ + { + id: 'group-1', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + duplicate: true, + }, + { + fieldName: 'airline', + fieldValue: 'UAL', + duplicate: false, + }, + ], + docCount: 101, + pValue: 0.01, + }, + { + id: 'group-2', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + duplicate: true, + }, + { + fieldName: 'airline', + fieldValue: 'AAL', + duplicate: false, + }, + ], + docCount: 49, + pValue: 0.001, + }, + ]); + }); + + it('marks duplicates based on change point groups for artificial logs', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + const fieldValuePairCounts = getFieldValuePairCounts(leaves); + const markedDuplicates = getMarkedDuplicates(leaves, fieldValuePairCounts); + + expect(markedDuplicates).toEqual([ + { + docCount: 792, + group: [ + { + duplicate: false, + fieldName: 'response_code', + fieldValue: '500', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'home.php', + }, + ], + id: '2038579476', + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts new file mode 100644 index 00000000000000..a4ed85e8e94b69 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { FieldValuePairCounts } from '../../../common/types'; + +/** + * Analyse duplicate field/value pairs in change point groups. + */ +export function getMarkedDuplicates( + cpgs: ChangePointGroup[], + fieldValuePairCounts: FieldValuePairCounts +): ChangePointGroup[] { + return cpgs.map((cpg) => { + return { + ...cpg, + group: cpg.group.map((g) => { + return { + ...g, + duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1, + }; + }), + }; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts new file mode 100644 index 00000000000000..477321b74e0075 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; + +describe('getMissingChangePoints', () => { + it('get missing change points', () => { + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + const missingChangePoints = getMissingChangePoints( + deduplicatedChangePoints, + groupsWithReaddedDuplicates + ); + + expect(missingChangePoints).toEqual([ + { + bg_count: 553, + doc_count: 1981, + fieldName: 'user', + fieldValue: 'Peter', + normalizedScore: 0.8327337555873047, + pValue: 2.7454255728359757e-21, + score: 47.34435085428873, + total_bg_count: 1975, + total_doc_count: 4671, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts new file mode 100644 index 00000000000000..57422ad16213f0 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils'; + +export function getMissingChangePoints( + deduplicatedChangePoints: ChangePoint[], + changePointGroups: ChangePointGroup[] +) { + return deduplicatedChangePoints.filter((cp) => { + return !changePointGroups.some((cpg) => { + return cpg.group.some((d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue); + }); + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts index 5f2125a583db72..36cc113ad7be07 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts @@ -5,101 +5,50 @@ * 2.0. */ -import type { ChangePointGroup } from '@kbn/ml-agg-utils'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; -import { getFieldValuePairCounts, markDuplicates } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; -const changePointGroups: ChangePointGroup[] = [ - { - id: 'group-1', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', +describe('getSimpleHierarchicalTree', () => { + it('returns the hierarchical tree', () => { + // stringify and again parse the tree to remove attached methods + // and make it comparable against a static representation. + expect( + JSON.parse( + JSON.stringify(getSimpleHierarchicalTree(filteredFrequentItems, true, false, fields)) + ) + ).toEqual({ + root: { + name: '', + set: [], + docCount: 0, + pValue: 0, + children: [ + { + name: "792/1505 500 home.php '*'", + set: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + children: [ + { + name: "792/1505 500 home.php '*'", + set: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + children: [], + }, + ], + }, + ], }, - { - fieldName: 'airline', - fieldValue: 'UAL', - }, - ], - docCount: 101, - pValue: 0.01, - }, - { - id: 'group-2', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - }, - { - fieldName: 'airline', - fieldValue: 'AAL', - }, - ], - docCount: 49, - pValue: 0.001, - }, -]; - -describe('get_simple_hierarchical_tree', () => { - describe('getFieldValuePairCounts', () => { - it('returns a nested record with field/value pair counts', () => { - const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); - - expect(fieldValuePairCounts).toEqual({ - airline: { - AAL: 1, - UAL: 1, - }, - 'custom_field.keyword': { - deviation: 2, - }, - }); - }); - }); - - describe('markDuplicates', () => { - it('marks duplicates based on change point groups', () => { - const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); - const markedDuplicates = markDuplicates(changePointGroups, fieldValuePairCounts); - - expect(markedDuplicates).toEqual([ - { - id: 'group-1', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - duplicate: true, - }, - { - fieldName: 'airline', - fieldValue: 'UAL', - duplicate: false, - }, - ], - docCount: 101, - pValue: 0.01, - }, - { - id: 'group-2', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - duplicate: true, - }, - { - fieldName: 'airline', - fieldValue: 'AAL', - duplicate: false, - }, - ], - docCount: 49, - pValue: 0.001, - }, - ]); + fields: ['response_code', 'url', 'user'], }); }); }); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts index 9f39d1eb11f681..41c014e27af440 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts @@ -5,47 +5,15 @@ * 2.0. */ -// import { omit, uniq } from 'lodash'; +import type { ItemsetResult, SimpleHierarchicalTreeNode } from '../../../common/types'; -import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils'; -import { stringHash } from '@kbn/ml-string-hash'; +import { getValueCounts } from './get_value_counts'; +import { getValuesDescending } from './get_values_descending'; -import type { ItemsetResult } from './fetch_frequent_items'; +function NewNodeFactory(name: string): SimpleHierarchicalTreeNode { + const children: SimpleHierarchicalTreeNode[] = []; -function getValueCounts(df: ItemsetResult[], field: string) { - return df.reduce>((p, c) => { - if (c.set[field] === undefined) { - return p; - } - p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1; - return p; - }, {}); -} - -function getValuesDescending(df: ItemsetResult[], field: string): string[] { - const valueCounts = getValueCounts(df, field); - const keys = Object.keys(valueCounts); - - return keys.sort((a, b) => { - return valueCounts[b] - valueCounts[a]; - }); -} - -interface NewNode { - name: string; - set: FieldValuePair[]; - docCount: number; - pValue: number | null; - children: NewNode[]; - icon: string; - iconStyle: string; - addNode: (node: NewNode) => void; -} - -function NewNodeFactory(name: string): NewNode { - const children: NewNode[] = []; - - const addNode = (node: NewNode) => { + const addNode = (node: SimpleHierarchicalTreeNode) => { children.push(node); }; @@ -55,19 +23,15 @@ function NewNodeFactory(name: string): NewNode { docCount: 0, pValue: 0, children, - icon: 'default', - iconStyle: 'default', addNode, }; } /** - * Simple (poorly implemented) function that constructs a tree from an itemset DataFrame sorted by support (count) + * Simple function that constructs a tree from an itemset DataFrame sorted by support (count) * The resulting tree components are non-overlapping subsets of the data. * In summary, we start with the most inclusive itemset (highest count), and perform a depth first search in field order. * - * TODO - the code style here is hacky and should be re-written - * * @param displayParent * @param parentDocCount * @param parentLabel @@ -80,7 +44,7 @@ function NewNodeFactory(name: string): NewNode { */ function dfDepthFirstSearch( fields: string[], - displayParent: NewNode, + displayParent: SimpleHierarchicalTreeNode, parentDocCount: number, parentLabel: string, field: string, @@ -108,7 +72,7 @@ function dfDepthFirstSearch( let label = `${parentLabel} ${value}`; - let displayNode: NewNode; + let displayNode: SimpleHierarchicalTreeNode; if (parentDocCount === docCount && collapseRedundant) { // collapse identical paths displayParent.name += ` ${value}`; @@ -118,7 +82,6 @@ function dfDepthFirstSearch( displayNode = displayParent; } else { displayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`); - displayNode.iconStyle = 'warning'; displayNode.set = [...displayParent.set]; displayNode.set.push({ fieldName: field, fieldValue: value }); displayNode.docCount = docCount; @@ -130,8 +93,6 @@ function dfDepthFirstSearch( while (true) { const nextFieldIndex = fields.indexOf(field) + 1; if (nextFieldIndex >= fields.length) { - displayNode.icon = 'file'; - displayNode.iconStyle = 'info'; return docCount; } nextField = fields[nextFieldIndex]; @@ -147,7 +108,6 @@ function dfDepthFirstSearch( displayNode.name += ` '*'`; label += ` '*'`; const nextDisplayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`); - nextDisplayNode.iconStyle = 'warning'; nextDisplayNode.set = displayNode.set; nextDisplayNode.docCount = docCount; nextDisplayNode.pValue = pValue; @@ -194,12 +154,6 @@ export function getSimpleHierarchicalTree( displayOther: boolean, fields: string[] = [] ) { - // const candidates = uniq( - // df.flatMap((d) => - // Object.keys(omit(d, ['size', 'maxPValue', 'doc_count', 'support', 'total_doc_count'])) - // ) - // ); - const field = fields[0]; const totalDocCount = Math.max(...df.map((d) => d.total_doc_count)); @@ -222,70 +176,3 @@ export function getSimpleHierarchicalTree( return { root: newRoot, fields }; } - -/** - * Get leaves from hierarchical tree. - */ -export function getSimpleHierarchicalTreeLeaves( - tree: NewNode, - leaves: ChangePointGroup[], - level = 1 -) { - if (tree.children.length === 0) { - leaves.push({ - id: `${stringHash(JSON.stringify(tree.set))}`, - group: tree.set, - docCount: tree.docCount, - pValue: tree.pValue, - }); - } else { - for (const child of tree.children) { - const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1); - if (newLeaves.length > 0) { - leaves.push(...newLeaves); - } - } - } - - if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) { - return []; - } - - return leaves; -} - -type FieldValuePairCounts = Record>; -/** - * Get a nested record of field/value pairs with counts - */ -export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts { - return cpgs.reduce((p, { group }) => { - group.forEach(({ fieldName, fieldValue }) => { - if (p[fieldName] === undefined) { - p[fieldName] = {}; - } - p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1; - }); - return p; - }, {}); -} - -/** - * Analyse duplicate field/value pairs in change point groups. - */ -export function markDuplicates( - cpgs: ChangePointGroup[], - fieldValuePairCounts: FieldValuePairCounts -): ChangePointGroup[] { - return cpgs.map((cpg) => { - return { - ...cpg, - group: cpg.group.map((g) => { - return { - ...g, - duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1, - }; - }), - }; - }); -} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts new file mode 100644 index 00000000000000..9567d38f3d402a --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('getSimpleHierarchicalTreeLeaves', () => { + it('returns the hierarchical tree leaves', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + expect(leaves).toEqual([ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts new file mode 100644 index 00000000000000..699c6e447c4de0 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; +import { stringHash } from '@kbn/ml-string-hash'; + +import type { SimpleHierarchicalTreeNode } from '../../../common/types'; + +/** + * Get leaves from hierarchical tree. + */ +export function getSimpleHierarchicalTreeLeaves( + tree: SimpleHierarchicalTreeNode, + leaves: ChangePointGroup[], + level = 1 +) { + if (tree.children.length === 0) { + leaves.push({ + id: `${stringHash(JSON.stringify(tree.set))}`, + group: tree.set, + docCount: tree.docCount, + pValue: tree.pValue, + }); + } else { + for (const child of tree.children) { + const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1); + if (newLeaves.length > 0) { + leaves.push(...newLeaves); + } + } + } + + if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) { + return []; + } + + return leaves; +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts new file mode 100644 index 00000000000000..744179c485caae --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { getValueCounts } from './get_value_counts'; + +describe('getValueCounts', () => { + it('get value counts for field response_code', () => { + expect(getValueCounts(frequentItems, 'response_code')).toEqual({ + '200': 1, + '404': 1, + '500': 3, + }); + }); + + it('get value counts for field url', () => { + expect(getValueCounts(frequentItems, 'url')).toEqual({ 'home.php': 6 }); + }); + + it('get value counts for field user', () => { + expect(getValueCounts(frequentItems, 'user')).toEqual({ + Mary: 1, + Paul: 1, + Peter: 3, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts new file mode 100644 index 00000000000000..b287d49494d788 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../../common/types'; + +export function getValueCounts(df: ItemsetResult[], field: string) { + return df.reduce>((p, c) => { + if (c.set[field] === undefined) { + return p; + } + p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1; + return p; + }, {}); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts new file mode 100644 index 00000000000000..cd4935b4fcc8fd --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { getValuesDescending } from './get_values_descending'; + +describe('getValuesDescending', () => { + it('get descending values for field response_code', () => { + expect(getValuesDescending(frequentItems, 'response_code')).toEqual(['500', '200', '404']); + }); + + it('get descending values for field url', () => { + expect(getValuesDescending(frequentItems, 'url')).toEqual(['home.php']); + }); + + it('get descending values for field user', () => { + expect(getValuesDescending(frequentItems, 'user')).toEqual(['Peter', 'Mary', 'Paul']); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts new file mode 100644 index 00000000000000..8429ca4fcae75a --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../../common/types'; + +import { getValueCounts } from './get_value_counts'; + +export function getValuesDescending(df: ItemsetResult[], field: string): string[] { + const valueCounts = getValueCounts(df, field); + const keys = Object.keys(valueCounts); + + return keys.sort((a, b) => { + return valueCounts[b] - valueCounts[a]; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts new file mode 100644 index 00000000000000..448f3003fc924c --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; +import { transformChangePointToGroup } from './transform_change_point_to_group'; + +describe('getMissingChangePoints', () => { + it('get missing change points', () => { + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + const missingChangePoints = getMissingChangePoints( + deduplicatedChangePoints, + groupsWithReaddedDuplicates + ); + + const transformed = transformChangePointToGroup(missingChangePoints[0], groupedChangePoints); + + expect(transformed).toEqual({ + docCount: 1981, + group: [{ duplicate: false, fieldName: 'user', fieldValue: 'Peter' }], + id: '817080373', + pValue: 2.7454255728359757e-21, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts new file mode 100644 index 00000000000000..8e6c77971dcee4 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { stringHash } from '@kbn/ml-string-hash'; +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +import type { ChangePointDuplicateGroup } from '../../../common/types'; + +export function transformChangePointToGroup( + changePoint: ChangePoint, + groupedChangePoints: ChangePointDuplicateGroup[] +) { + const { fieldName, fieldValue, doc_count: docCount, pValue } = changePoint; + + const duplicates = groupedChangePoints.find((d) => + d.group.some((dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue) + ); + + if (duplicates !== undefined) { + return { + id: `${stringHash( + JSON.stringify( + duplicates.group.map((d) => ({ + fieldName: d.fieldName, + fieldValue: d.fieldValue, + })) + ) + )}`, + group: duplicates.group.map((d) => ({ + fieldName: d.fieldName, + fieldValue: d.fieldValue, + duplicate: false, + })), + docCount, + pValue, + }; + } else { + return { + id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`, + group: [ + { + fieldName, + fieldValue, + duplicate: false, + }, + ], + docCount, + pValue, + }; + } +} diff --git a/x-pack/test/api_integration/apis/aiops/test_data.ts b/x-pack/test/api_integration/apis/aiops/test_data.ts index 8503adc74a250c..3be75f1e875a8d 100644 --- a/x-pack/test/api_integration/apis/aiops/test_data.ts +++ b/x-pack/test/api_integration/apis/aiops/test_data.ts @@ -5,6 +5,12 @@ * 2.0. */ +// We're using the mocks for jest unit tests as expected data in the integration tests here. +// This makes sure should the assertions for the integration tests need to be updated, +// that also the jest unit tests use mocks that are not outdated. +import { changePoints as artificialLogChangePoints } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/change_points'; +import { finalChangePointGroups as artificialLogsChangePointGroups } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_change_point_groups'; + import type { TestData } from './types'; export const explainLogRateSpikesTestData: TestData[] = [ @@ -86,70 +92,8 @@ export const explainLogRateSpikesTestData: TestData[] = [ groupHistogramFilter: 'add_change_point_group_histogram', histogramFilter: 'add_change_points_histogram', errorFilter: 'add_error', - changePoints: [ - { - fieldName: 'response_code', - fieldValue: '500', - doc_count: 1821, - bg_count: 553, - total_doc_count: 4671, - total_bg_count: 1975, - score: 26.546201745993947, - pValue: 2.9589053032077285e-12, - normalizedScore: 0.7814127409489161, - }, - { - fieldName: 'url', - fieldValue: 'home.php', - doc_count: 1742, - bg_count: 632, - total_doc_count: 4671, - total_bg_count: 1975, - score: 4.53094842981472, - pValue: 0.010770456205312423, - normalizedScore: 0.10333028878375965, - }, - { - fieldName: 'url', - fieldValue: 'login.php', - doc_count: 1742, - bg_count: 632, - total_doc_count: 4671, - total_bg_count: 1975, - score: 4.53094842981472, - pValue: 0.010770456205312423, - normalizedScore: 0.10333028878375965, - }, - { - fieldName: 'user', - fieldValue: 'Peter', - doc_count: 1981, - bg_count: 553, - total_doc_count: 4671, - total_bg_count: 1975, - score: 47.34435085428873, - pValue: 2.7454255728359757e-21, - normalizedScore: 0.8327337555873047, - }, - ], - groups: [ - { - id: '2038579476', - group: [ - { fieldName: 'response_code', fieldValue: '500', duplicate: false }, - { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, - { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, - ], - docCount: 792, - pValue: 0.010770456205312423, - }, - { - id: '817080373', - group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], - docCount: 1981, - pValue: 2.7454255728359757e-21, - }, - ], + changePoints: artificialLogChangePoints, + groups: artificialLogsChangePointGroups, histogramLength: 20, }, },