diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts new file mode 100644 index 00000000000000..9b580ac36ce262 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_point_groups.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const changePointGroups: ChangePointGroup[] = [ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts new file mode 100644 index 00000000000000..5b4597a2ecd8a0 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/change_points.ts @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const changePoints = [ + { + fieldName: 'response_code', + fieldValue: '500', + doc_count: 1821, + bg_count: 553, + total_doc_count: 4671, + total_bg_count: 1975, + score: 26.546201745993947, + pValue: 2.9589053032077285e-12, + normalizedScore: 0.7814127409489161, + }, + { + fieldName: 'url', + fieldValue: 'home.php', + doc_count: 1742, + bg_count: 632, + total_doc_count: 4671, + total_bg_count: 1975, + score: 4.53094842981472, + pValue: 0.010770456205312423, + normalizedScore: 0.10333028878375965, + }, + { + fieldName: 'url', + fieldValue: 'login.php', + doc_count: 1742, + bg_count: 632, + total_doc_count: 4671, + total_bg_count: 1975, + score: 4.53094842981472, + pValue: 0.010770456205312423, + normalizedScore: 0.10333028878375965, + }, + { + fieldName: 'user', + fieldValue: 'Peter', + doc_count: 1981, + bg_count: 553, + total_doc_count: 4671, + total_bg_count: 1975, + score: 47.34435085428873, + pValue: 2.7454255728359757e-21, + normalizedScore: 0.8327337555873047, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts new file mode 100644 index 00000000000000..bb42e8a6048e7b --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/fields.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const fields = ['response_code', 'url', 'user']; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts new file mode 100644 index 00000000000000..268516f95542df --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_items.ts @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../types'; + +export const filteredFrequentItems: ItemsetResult[] = [ + { + set: { response_code: '500', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 792, + support: 0.5262458471760797, + total_doc_count: 1505, + }, + { + set: { user: 'Peter', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 634, + support: 0.4212624584717608, + total_doc_count: 1505, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts new file mode 100644 index 00000000000000..dce10c1280985c --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_change_point_groups.ts @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const finalChangePointGroups: ChangePointGroup[] = [ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500', duplicate: false }, + { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, + { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, + { + id: '817080373', + group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], + docCount: 1981, + pValue: 2.7454255728359757e-21, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts new file mode 100644 index 00000000000000..fe61a60a1afbe5 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_items.ts @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../types'; + +export const frequentItems: ItemsetResult[] = [ + { + set: { response_code: '500', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 792, + support: 0.5262458471760797, + total_doc_count: 1505, + }, + { + set: { user: 'Peter', url: 'home.php' }, + size: 2, + maxPValue: 0.010770456205312423, + doc_count: 634, + support: 0.4212624584717608, + total_doc_count: 1505, + }, + { + set: { response_code: '500', user: 'Mary', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 396, + support: 0.26312292358803985, + total_doc_count: 1505, + }, + { + set: { response_code: '500', user: 'Paul', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 396, + support: 0.26312292358803985, + total_doc_count: 1505, + }, + { + set: { response_code: '404', user: 'Peter', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 317, + support: 0.2106312292358804, + total_doc_count: 1505, + }, + { + set: { response_code: '200', user: 'Peter', url: 'home.php' }, + size: 3, + maxPValue: 0.010770456205312423, + doc_count: 317, + support: 0.2106312292358804, + total_doc_count: 1505, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts b/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts new file mode 100644 index 00000000000000..b30303e384f848 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/farequote/change_point_groups.ts @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +export const changePointGroups: ChangePointGroup[] = [ + { + id: 'group-1', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + }, + { + fieldName: 'airline', + fieldValue: 'UAL', + }, + ], + docCount: 101, + pValue: 0.01, + }, + { + id: 'group-2', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + }, + { + fieldName: 'airline', + fieldValue: 'AAL', + }, + ], + docCount: 49, + pValue: 0.001, + }, +]; diff --git a/x-pack/plugins/aiops/common/types.ts b/x-pack/plugins/aiops/common/types.ts new file mode 100644 index 00000000000000..0acb3d07883ad2 --- /dev/null +++ b/x-pack/plugins/aiops/common/types.ts @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; + +export interface ChangePointDuplicateGroup { + keys: Pick; + group: ChangePoint[]; +} + +export type FieldValuePairCounts = Record>; + +export interface ItemsetResult { + set: Record; + size: number; + maxPValue: number; + doc_count: number; + support: number; + total_doc_count: number; +} + +export interface SimpleHierarchicalTreeNode { + name: string; + set: FieldValuePair[]; + docCount: number; + pValue: number | null; + children: SimpleHierarchicalTreeNode[]; + addNode: (node: SimpleHierarchicalTreeNode) => void; +} diff --git a/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts b/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts index c886b16fa0ec2f..7a2650f02d3a96 100644 --- a/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts +++ b/x-pack/plugins/aiops/public/application/utils/query_utils.test.ts @@ -7,7 +7,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../../components/spike_analysis_table/types'; import { buildBaseFilterCriteria } from './query_utils'; diff --git a/x-pack/plugins/aiops/public/application/utils/query_utils.ts b/x-pack/plugins/aiops/public/application/utils/query_utils.ts index 0c0363d852bc96..94f5bc4d9f70b2 100644 --- a/x-pack/plugins/aiops/public/application/utils/query_utils.ts +++ b/x-pack/plugins/aiops/public/application/utils/query_utils.ts @@ -16,7 +16,7 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { Query } from '@kbn/es-query'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from '../../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../../components/spike_analysis_table/types'; /* * Contains utility functions for building and processing queries. diff --git a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx index e84f50b02711c3..b458e1da73516d 100644 --- a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx +++ b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_analysis.tsx @@ -25,14 +25,17 @@ import type { WindowParameters } from '@kbn/aiops-utils'; import { i18n } from '@kbn/i18n'; import { FormattedMessage } from '@kbn/i18n-react'; import type { Query } from '@kbn/es-query'; -import type { FieldValuePair } from '@kbn/ml-agg-utils'; import { useAiopsAppContext } from '../../hooks/use_aiops_app_context'; import { initialState, streamReducer } from '../../../common/api/stream_reducer'; import type { ApiExplainLogRateSpikes } from '../../../common/api'; -import { SpikeAnalysisGroupsTable } from '../spike_analysis_table'; -import { SpikeAnalysisTable } from '../spike_analysis_table'; +import { + getGroupTableItems, + SpikeAnalysisTable, + SpikeAnalysisGroupsTable, +} from '../spike_analysis_table'; +import {} from '../spike_analysis_table'; import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider'; const groupResultsMessage = i18n.translate( @@ -159,35 +162,10 @@ export const ExplainLogRateSpikesAnalysis: FC // eslint-disable-next-line react-hooks/exhaustive-deps }, []); - const groupTableItems = useMemo(() => { - const tableItems = data.changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => { - const sortedGroup = group.sort((a, b) => - a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0 - ); - const dedupedGroup: FieldValuePair[] = []; - const repeatedValues: FieldValuePair[] = []; - - sortedGroup.forEach((pair) => { - const { fieldName, fieldValue } = pair; - if (pair.duplicate === false) { - dedupedGroup.push({ fieldName, fieldValue }); - } else { - repeatedValues.push({ fieldName, fieldValue }); - } - }); - - return { - id, - docCount, - pValue, - group: dedupedGroup, - repeatedValues, - histogram, - }; - }); - - return tableItems; - }, [data.changePointsGroups]); + const groupTableItems = useMemo( + () => getGroupTableItems(data.changePointsGroups), + [data.changePointsGroups] + ); const shouldRerunAnalysis = useMemo( () => diff --git a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx index 6595a9f68db94d..a6327331a51050 100644 --- a/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx +++ b/x-pack/plugins/aiops/public/components/explain_log_rate_spikes/explain_log_rate_spikes_page.tsx @@ -38,7 +38,7 @@ import { SearchPanel } from '../search_panel'; import { restorableDefaults } from './explain_log_rate_spikes_app_state'; import { ExplainLogRateSpikesAnalysis } from './explain_log_rate_spikes_analysis'; -import type { GroupTableItem } from '../spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../spike_analysis_table/types'; import { useSpikeAnalysisTableRowContext } from '../spike_analysis_table/spike_analysis_table_row_provider'; // TODO port to `@emotion/react` once `useEuiBreakpoint` is available https://github.com/elastic/eui/pull/6057 diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts new file mode 100644 index 00000000000000..8807479da12420 --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.test.ts @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; + +import { getGroupTableItems } from './get_group_table_items'; + +describe('getGroupTableItems', () => { + it('transforms change point groups into table items', () => { + const groupTableItems = getGroupTableItems(finalChangePointGroups); + + expect(groupTableItems).toEqual([ + { + docCount: 792, + group: [ + { + fieldName: 'response_code', + fieldValue: '500', + }, + { + fieldName: 'url', + fieldValue: 'home.php', + }, + { + fieldName: 'url', + fieldValue: 'login.php', + }, + ], + histogram: undefined, + id: '2038579476', + pValue: 0.010770456205312423, + repeatedValues: [], + }, + { + docCount: 1981, + group: [ + { + fieldName: 'user', + fieldValue: 'Peter', + }, + ], + histogram: undefined, + id: '817080373', + pValue: 2.7454255728359757e-21, + repeatedValues: [], + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts new file mode 100644 index 00000000000000..9135a5449e504e --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/get_group_table_items.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils'; + +import type { GroupTableItem } from './types'; + +export function getGroupTableItems(changePointsGroups: ChangePointGroup[]): GroupTableItem[] { + const tableItems = changePointsGroups.map(({ id, group, docCount, histogram, pValue }) => { + const sortedGroup = group.sort((a, b) => + a.fieldName > b.fieldName ? 1 : b.fieldName > a.fieldName ? -1 : 0 + ); + const dedupedGroup: FieldValuePair[] = []; + const repeatedValues: FieldValuePair[] = []; + + sortedGroup.forEach((pair) => { + const { fieldName, fieldValue } = pair; + if (pair.duplicate === false) { + dedupedGroup.push({ fieldName, fieldValue }); + } else { + repeatedValues.push({ fieldName, fieldValue }); + } + }); + + return { + id, + docCount, + pValue, + group: dedupedGroup, + repeatedValues, + histogram, + }; + }); + + return tableItems; +} diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts index 120b15b03364f1..39452f67d1d6ca 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/index.ts @@ -5,5 +5,6 @@ * 2.0. */ +export { getGroupTableItems } from './get_group_table_items'; export { SpikeAnalysisTable } from './spike_analysis_table'; export { SpikeAnalysisGroupsTable } from './spike_analysis_table_groups'; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx index 25518598537714..642f09c66765c3 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_groups.tsx @@ -37,6 +37,7 @@ import { MiniHistogram } from '../mini_histogram'; import { getFailedTransactionsCorrelationImpactLabel } from './get_failed_transactions_correlation_impact_label'; import { SpikeAnalysisTable } from './spike_analysis_table'; import { useSpikeAnalysisTableRowContext } from './spike_analysis_table_row_provider'; +import type { GroupTableItem } from './types'; const NARROW_COLUMN_WIDTH = '120px'; const EXPAND_COLUMN_WIDTH = '40px'; @@ -54,15 +55,6 @@ const viewInDiscoverMessage = i18n.translate( } ); -export interface GroupTableItem { - id: string; - docCount: number; - pValue: number | null; - group: FieldValuePair[]; - repeatedValues: FieldValuePair[]; - histogram: ChangePoint['histogram']; -} - interface SpikeAnalysisTableProps { changePoints: ChangePoint[]; groupTableItems: GroupTableItem[]; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx index 88b6e508d2f4cd..11f61777430ee2 100644 --- a/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/spike_analysis_table_row_provider.tsx @@ -17,7 +17,7 @@ import React, { import type { ChangePoint } from '@kbn/ml-agg-utils'; -import type { GroupTableItem } from './spike_analysis_table_groups'; +import type { GroupTableItem } from './types'; type ChangePointOrNull = ChangePoint | null; type GroupOrNull = GroupTableItem | null; diff --git a/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts b/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts new file mode 100644 index 00000000000000..842816f5095a62 --- /dev/null +++ b/x-pack/plugins/aiops/public/components/spike_analysis_table/types.ts @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils'; + +export interface GroupTableItem { + id: string; + docCount: number; + pValue: number | null; + group: FieldValuePair[]; + repeatedValues: FieldValuePair[]; + histogram: ChangePoint['histogram']; +} diff --git a/x-pack/plugins/aiops/public/get_document_stats.ts b/x-pack/plugins/aiops/public/get_document_stats.ts index b5960a50a4ba0e..f95e0eb2cc6108 100644 --- a/x-pack/plugins/aiops/public/get_document_stats.ts +++ b/x-pack/plugins/aiops/public/get_document_stats.ts @@ -14,7 +14,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils'; import type { Query } from '@kbn/es-query'; import { buildBaseFilterCriteria } from './application/utils/query_utils'; -import { GroupTableItem } from './components/spike_analysis_table/spike_analysis_table_groups'; +import { GroupTableItem } from './components/spike_analysis_table/types'; export interface DocumentCountStats { interval?: number; diff --git a/x-pack/plugins/aiops/public/hooks/use_data.ts b/x-pack/plugins/aiops/public/hooks/use_data.ts index 75554d5a9b96ed..73b5f79be3b4f4 100644 --- a/x-pack/plugins/aiops/public/hooks/use_data.ts +++ b/x-pack/plugins/aiops/public/hooks/use_data.ts @@ -27,7 +27,7 @@ import { import { useTimefilter } from './use_time_filter'; import { useDocumentCountStats } from './use_document_count_stats'; import type { Dictionary } from './use_url_state'; -import type { GroupTableItem } from '../components/spike_analysis_table/spike_analysis_table_groups'; +import type { GroupTableItem } from '../components/spike_analysis_table/types'; const DEFAULT_BAR_TARGET = 75; diff --git a/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts b/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts index 03021415410ede..dfbc313632efc9 100644 --- a/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts +++ b/x-pack/plugins/aiops/server/routes/explain_log_rate_spikes.ts @@ -6,7 +6,6 @@ */ import { queue } from 'async'; -import { uniqWith, isEqual } from 'lodash'; import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; @@ -23,7 +22,6 @@ import type { NumericHistogramField, } from '@kbn/ml-agg-utils'; import { fetchHistogramsForFields } from '@kbn/ml-agg-utils'; -import { stringHash } from '@kbn/ml-string-hash'; import { addChangePointsAction, @@ -43,22 +41,13 @@ import { API_ENDPOINT } from '../../common/api'; import { isRequestAbortedError } from '../lib/is_request_aborted_error'; import type { AiopsLicense } from '../types'; +import { duplicateIdentifier } from './queries/duplicate_identifier'; import { fetchChangePointPValues } from './queries/fetch_change_point_p_values'; import { fetchIndexInfo } from './queries/fetch_index_info'; -import { - dropDuplicates, - fetchFrequentItems, - groupDuplicates, -} from './queries/fetch_frequent_items'; -import type { ItemsetResult } from './queries/fetch_frequent_items'; +import { dropDuplicates, fetchFrequentItems } from './queries/fetch_frequent_items'; import { getHistogramQuery } from './queries/get_histogram_query'; -import { - getFieldValuePairCounts, - getSimpleHierarchicalTree, - getSimpleHierarchicalTreeLeaves, - markDuplicates, -} from './queries/get_simple_hierarchical_tree'; import { getGroupFilter } from './queries/get_group_filter'; +import { getChangePointGroups } from './queries/get_change_point_groups'; // 10s ping frequency to keep the stream alive. const PING_FREQUENCY = 10000; @@ -434,25 +423,9 @@ export const defineExplainLogRateSpikesRoute = ( }) ); - // To optimize the `frequent_items` query, we identify duplicate change points by count attributes. - // Note this is a compromise and not 100% accurate because there could be change points that - // have the exact same counts but still don't co-occur. - const duplicateIdentifier: Array = [ - 'doc_count', - 'bg_count', - 'total_doc_count', - 'total_bg_count', - ]; - - // These are the deduplicated change points we pass to the `frequent_items` aggregation. + // Deduplicated change points we pass to the `frequent_items` aggregation. const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); - // We use the grouped change points to later repopulate - // the `frequent_items` result with the missing duplicates. - const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( - (g) => g.group.length > 1 - ); - try { const { fields, df } = await fetchFrequentItems( client, @@ -475,143 +448,9 @@ export const defineExplainLogRateSpikesRoute = ( } if (fields.length > 0 && df.length > 0) { - // The way the `frequent_items` aggregations works could return item sets that include - // field/value pairs that are not part of the original list of significant change points. - // This cleans up groups and removes those unrelated field/value pairs. - const filteredDf = df - .map((fi, fiIndex) => { - const updatedSet = Object.entries(fi.set).reduce( - (set, [field, value]) => { - if ( - changePoints.some( - (cp) => cp.fieldName === field && cp.fieldValue === value - ) - ) { - set[field] = value; - } - return set; - }, - {} - ); - - // only assign the updated reduced set if it doesn't already match - // an existing set. if there's a match just add an empty set - // so it will be filtered in the last step. - fi.set = df.some((d, dIndex) => fiIndex !== dIndex && isEqual(fi.set, d.set)) - ? {} - : updatedSet; - - fi.size = Object.keys(fi.set).length; - - return fi; - }) - .filter((fi) => fi.size > 1); - - // `frequent_items` returns lot of different small groups of field/value pairs that co-occur. - // The following steps analyse these small groups, identify overlap between these groups, - // and then summarize them in larger groups where possible. - - // Get a tree structure based on `frequent_items`. - const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields); - - // Each leave of the tree will be a summarized group of co-occuring field/value pairs. - const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []); - - // To be able to display a more cleaned up results table in the UI, we identify field/value pairs - // that occur in multiple groups. This will allow us to highlight field/value pairs that are - // unique to a group in a better way. This step will also re-add duplicates we identified in the - // beginning and didn't pass on to the `frequent_items` agg. - const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves); - const changePointGroups = markDuplicates(treeLeaves, fieldValuePairCounts).map( - (g) => { - const group = [...g.group]; - - for (const groupItem of g.group) { - const { duplicate } = groupItem; - const duplicates = groupedChangePoints.find((d) => - d.group.some( - (dg) => - dg.fieldName === groupItem.fieldName && - dg.fieldValue === groupItem.fieldValue - ) - ); - - if (duplicates !== undefined) { - group.push( - ...duplicates.group.map((d) => { - return { - fieldName: d.fieldName, - fieldValue: d.fieldValue, - duplicate, - }; - }) - ); - } - } - - return { - ...g, - group: uniqWith(group, (a, b) => isEqual(a, b)), - }; - } - ); - - // Some field/value pairs might not be part of the `frequent_items` result set, for example - // because they don't co-occur with other field/value pairs or because of the limits we set on the query. - // In this next part we identify those missing pairs and add them as individual groups. - const missingChangePoints = deduplicatedChangePoints.filter((cp) => { - return !changePointGroups.some((cpg) => { - return cpg.group.some( - (d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue - ); - }); - }); - - changePointGroups.push( - ...missingChangePoints.map( - ({ fieldName, fieldValue, doc_count: docCount, pValue }) => { - const duplicates = groupedChangePoints.find((d) => - d.group.some( - (dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue - ) - ); - if (duplicates !== undefined) { - return { - id: `${stringHash( - JSON.stringify( - duplicates.group.map((d) => ({ - fieldName: d.fieldName, - fieldValue: d.fieldValue, - })) - ) - )}`, - group: duplicates.group.map((d) => ({ - fieldName: d.fieldName, - fieldValue: d.fieldValue, - duplicate: false, - })), - docCount, - pValue, - }; - } else { - return { - id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`, - group: [ - { - fieldName, - fieldValue, - duplicate: false, - }, - ], - docCount, - pValue, - }; - } - } - ) - ); + const changePointGroups = getChangePointGroups(df, changePoints, fields); - // Finally, we'll find out if there's at least one group with at least two items, + // We'll find out if there's at least one group with at least two items, // only then will we return the groups to the clients and make the grouping option available. const maxItems = Math.max(...changePointGroups.map((g) => g.group.length)); diff --git a/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts b/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts new file mode 100644 index 00000000000000..d996c060f7fe96 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/duplicate_identifier.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +// To optimize the `frequent_items` query, we identify duplicate change points by count attributes. +// Note this is a compromise and not 100% accurate because there could be change points that +// have the exact same counts but still don't co-occur. +export const duplicateIdentifier: Array = [ + 'doc_count', + 'bg_count', + 'total_doc_count', + 'total_bg_count', +]; diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts index ff1fba16f28f24..76aeeb1eabcdbd 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_items.ts @@ -11,9 +11,11 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server'; import type { Logger } from '@kbn/logging'; -import { type ChangePoint, type FieldValuePair, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils'; +import { type ChangePoint, RANDOM_SAMPLER_SEED } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; +import type { ChangePointDuplicateGroup, ItemsetResult } from '../../../common/types'; + const FREQUENT_ITEMS_FIELDS_LIMIT = 15; interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation { @@ -34,10 +36,6 @@ export function dropDuplicates(cps: ChangePoint[], uniqueFields: Array isEqual(pick(a, uniqueFields), pick(b, uniqueFields))); } -interface ChangePointDuplicateGroup { - keys: Pick; - group: ChangePoint[]; -} export function groupDuplicates(cps: ChangePoint[], uniqueFields: Array) { const groups: ChangePointDuplicateGroup[] = []; @@ -226,12 +224,3 @@ export async function fetchFrequentItems( totalDocCount: totalDocCountFi, }; } - -export interface ItemsetResult { - set: Record; - size: number; - maxPValue: number; - doc_count: number; - support: number; - total_doc_count: number; -} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts new file mode 100644 index 00000000000000..2496e9e927f0ed --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.test.ts @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; + +import { getChangePointGroups } from './get_change_point_groups'; + +describe('getChangePointGroups', () => { + it('gets change point groups', () => { + const changePointGroups = getChangePointGroups(frequentItems, changePoints, fields); + + expect(changePointGroups).toEqual(finalChangePointGroups); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts new file mode 100644 index 00000000000000..fa0722b15c9a4f --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_change_point_groups.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; +import { getFilteredFrequentItems } from './get_filtered_frequent_items'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; +import { transformChangePointToGroup } from './transform_change_point_to_group'; +import type { ItemsetResult } from '../../../common/types'; + +export function getChangePointGroups( + itemsets: ItemsetResult[], + changePoints: ChangePoint[], + fields: string[] +): ChangePointGroup[] { + // These are the deduplicated change points we pass to the `frequent_items` aggregation. + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + // We use the grouped change points to later repopulate + // the `frequent_items` result with the missing duplicates. + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const filteredDf = getFilteredFrequentItems(itemsets, changePoints); + + // `frequent_items` returns lot of different small groups of field/value pairs that co-occur. + // The following steps analyse these small groups, identify overlap between these groups, + // and then summarize them in larger groups where possible. + + // Get a tree structure based on `frequent_items`. + const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields); + + // Each leave of the tree will be a summarized group of co-occuring field/value pairs. + const treeLeaves = getSimpleHierarchicalTreeLeaves(root, []); + + // To be able to display a more cleaned up results table in the UI, we identify field/value pairs + // that occur in multiple groups. This will allow us to highlight field/value pairs that are + // unique to a group in a better way. This step will also re-add duplicates we identified in the + // beginning and didn't pass on to the `frequent_items` agg. + const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves); + const changePointGroupsWithMarkedDuplicates = getMarkedDuplicates( + treeLeaves, + fieldValuePairCounts + ); + const changePointGroups = getGroupsWithReaddedDuplicates( + changePointGroupsWithMarkedDuplicates, + groupedChangePoints + ); + + // Some field/value pairs might not be part of the `frequent_items` result set, for example + // because they don't co-occur with other field/value pairs or because of the limits we set on the query. + // In this next part we identify those missing pairs and add them as individual groups. + const missingChangePoints = getMissingChangePoints(deduplicatedChangePoints, changePointGroups); + + changePointGroups.push( + ...missingChangePoints.map((changePoint) => + transformChangePointToGroup(changePoint, groupedChangePoints) + ) + ); + + return changePointGroups; +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts new file mode 100644 index 00000000000000..d5b56751fa1732 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.test.ts @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('getFieldValuePairCounts', () => { + it('returns a nested record with field/value pair counts for farequote', () => { + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + + expect(fieldValuePairCounts).toEqual({ + airline: { + AAL: 1, + UAL: 1, + }, + 'custom_field.keyword': { + deviation: 2, + }, + }); + }); + + it('returns a nested record with field/value pair counts for artificial logs', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + const fieldValuePairCounts = getFieldValuePairCounts(leaves); + + expect(fieldValuePairCounts).toEqual({ + response_code: { + '500': 1, + }, + url: { + 'home.php': 1, + }, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts new file mode 100644 index 00000000000000..1c5f5ca33d7184 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_field_value_pair_counts.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { FieldValuePairCounts } from '../../../common/types'; + +/** + * Get a nested record of field/value pairs with counts + */ +export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts { + return cpgs.reduce((p, { group }) => { + group.forEach(({ fieldName, fieldValue }) => { + if (p[fieldName] === undefined) { + p[fieldName] = {}; + } + p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1; + }); + return p; + }, {}); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts new file mode 100644 index 00000000000000..8399c0366dea10 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.test.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFilteredFrequentItems } from './get_filtered_frequent_items'; + +describe('getFilteredFrequentItems', () => { + it('filter frequent item set based on provided change points', () => { + expect(getFilteredFrequentItems(frequentItems, changePoints)).toStrictEqual( + filteredFrequentItems + ); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts new file mode 100644 index 00000000000000..e071621f6d9f7e --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_filtered_frequent_items.ts @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { isEqual } from 'lodash'; + +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +import type { ItemsetResult } from '../../../common/types'; + +// The way the `frequent_items` aggregation works could return item sets that include +// field/value pairs that are not part of the original list of significant change points. +// This cleans up groups and removes those unrelated field/value pairs. +export function getFilteredFrequentItems( + itemsets: ItemsetResult[], + changePoints: ChangePoint[] +): ItemsetResult[] { + return itemsets.reduce((p, itemset, itemsetIndex) => { + // Remove field/value pairs not part of the provided change points + itemset.set = Object.entries(itemset.set).reduce( + (set, [field, value]) => { + if (changePoints.some((cp) => cp.fieldName === field && cp.fieldValue === value)) { + set[field] = value; + } + return set; + }, + {} + ); + + // Only assign the updated reduced set if it doesn't already match + // an existing set. if there's a match just add an empty set + // so it will be filtered in the last step. + if (itemsets.some((d, dIndex) => itemsetIndex !== dIndex && isEqual(itemset.set, d.set))) { + return p; + } + + // Update the size attribute to match the possibly updated set + itemset.size = Object.keys(itemset.set).length; + + p.push(itemset); + + return p; + }, []); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts index 432450ede9b455..b2c15d70e83f93 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_group_filter.test.ts @@ -5,30 +5,13 @@ * 2.0. */ -import { getGroupFilter } from './get_group_filter'; +import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups'; -const changePointGroups = [ - { - id: '2038579476', - group: [ - { fieldName: 'response_code', fieldValue: '500', duplicate: false }, - { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, - { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, - ], - docCount: 792, - pValue: 0.010770456205312423, - }, - { - id: '817080373', - group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], - docCount: 1981, - pValue: 2.7454255728359757e-21, - }, -]; +import { getGroupFilter } from './get_group_filter'; describe('getGroupFilter', () => { it('gets a query filter for the change points of a group with multiple values per field', () => { - expect(getGroupFilter(changePointGroups[0])).toStrictEqual([ + expect(getGroupFilter(finalChangePointGroups[0])).toStrictEqual([ { term: { response_code: '500', @@ -43,7 +26,7 @@ describe('getGroupFilter', () => { }); it('gets a query filter for the change points of a group with just a single field/value', () => { - expect(getGroupFilter(changePointGroups[1])).toStrictEqual([ + expect(getGroupFilter(finalChangePointGroups[1])).toStrictEqual([ { term: { user: 'Peter', diff --git a/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts new file mode 100644 index 00000000000000..50b5719e49fa4c --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.test.ts @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; + +describe('getGroupsWithReaddedDuplicates', () => { + it('gets groups with readded duplicates', () => { + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + expect(groupsWithReaddedDuplicates).toEqual([ + { + docCount: 792, + group: [ + { + duplicate: false, + fieldName: 'response_code', + fieldValue: '500', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'home.php', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'login.php', + }, + ], + id: '2038579476', + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts new file mode 100644 index 00000000000000..91622c2b15419e --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_groups_with_readded_duplicates.ts @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { uniqWith, isEqual } from 'lodash'; + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { ChangePointDuplicateGroup } from '../../../common/types'; + +export function getGroupsWithReaddedDuplicates( + groups: ChangePointGroup[], + groupedChangePoints: ChangePointDuplicateGroup[] +): ChangePointGroup[] { + return groups.map((g) => { + const group = [...g.group]; + + for (const groupItem of g.group) { + const { duplicate } = groupItem; + const duplicates = groupedChangePoints.find((d) => + d.group.some( + (dg) => dg.fieldName === groupItem.fieldName && dg.fieldValue === groupItem.fieldValue + ) + ); + + if (duplicates !== undefined) { + group.push( + ...duplicates.group.map((d) => { + return { + fieldName: d.fieldName, + fieldValue: d.fieldValue, + duplicate, + }; + }) + ); + } + } + + return { + ...g, + group: uniqWith(group, (a, b) => isEqual(a, b)), + }; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts new file mode 100644 index 00000000000000..e44a26d70494cf --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.test.ts @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('markDuplicates', () => { + it('marks duplicates based on change point groups for farequote', () => { + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + + expect(markedDuplicates).toEqual([ + { + id: 'group-1', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + duplicate: true, + }, + { + fieldName: 'airline', + fieldValue: 'UAL', + duplicate: false, + }, + ], + docCount: 101, + pValue: 0.01, + }, + { + id: 'group-2', + group: [ + { + fieldName: 'custom_field.keyword', + fieldValue: 'deviation', + duplicate: true, + }, + { + fieldName: 'airline', + fieldValue: 'AAL', + duplicate: false, + }, + ], + docCount: 49, + pValue: 0.001, + }, + ]); + }); + + it('marks duplicates based on change point groups for artificial logs', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + const fieldValuePairCounts = getFieldValuePairCounts(leaves); + const markedDuplicates = getMarkedDuplicates(leaves, fieldValuePairCounts); + + expect(markedDuplicates).toEqual([ + { + docCount: 792, + group: [ + { + duplicate: false, + fieldName: 'response_code', + fieldValue: '500', + }, + { + duplicate: false, + fieldName: 'url', + fieldValue: 'home.php', + }, + ], + id: '2038579476', + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts new file mode 100644 index 00000000000000..a4ed85e8e94b69 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_marked_duplicates.ts @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; + +import type { FieldValuePairCounts } from '../../../common/types'; + +/** + * Analyse duplicate field/value pairs in change point groups. + */ +export function getMarkedDuplicates( + cpgs: ChangePointGroup[], + fieldValuePairCounts: FieldValuePairCounts +): ChangePointGroup[] { + return cpgs.map((cpg) => { + return { + ...cpg, + group: cpg.group.map((g) => { + return { + ...g, + duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1, + }; + }), + }; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts new file mode 100644 index 00000000000000..477321b74e0075 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.test.ts @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; + +describe('getMissingChangePoints', () => { + it('get missing change points', () => { + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + const missingChangePoints = getMissingChangePoints( + deduplicatedChangePoints, + groupsWithReaddedDuplicates + ); + + expect(missingChangePoints).toEqual([ + { + bg_count: 553, + doc_count: 1981, + fieldName: 'user', + fieldValue: 'Peter', + normalizedScore: 0.8327337555873047, + pValue: 2.7454255728359757e-21, + score: 47.34435085428873, + total_bg_count: 1975, + total_doc_count: 4671, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts new file mode 100644 index 00000000000000..57422ad16213f0 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_missing_change_points.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils'; + +export function getMissingChangePoints( + deduplicatedChangePoints: ChangePoint[], + changePointGroups: ChangePointGroup[] +) { + return deduplicatedChangePoints.filter((cp) => { + return !changePointGroups.some((cpg) => { + return cpg.group.some((d) => d.fieldName === cp.fieldName && d.fieldValue === cp.fieldValue); + }); + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts index 5f2125a583db72..36cc113ad7be07 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.test.ts @@ -5,101 +5,50 @@ * 2.0. */ -import type { ChangePointGroup } from '@kbn/ml-agg-utils'; +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; -import { getFieldValuePairCounts, markDuplicates } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; -const changePointGroups: ChangePointGroup[] = [ - { - id: 'group-1', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', +describe('getSimpleHierarchicalTree', () => { + it('returns the hierarchical tree', () => { + // stringify and again parse the tree to remove attached methods + // and make it comparable against a static representation. + expect( + JSON.parse( + JSON.stringify(getSimpleHierarchicalTree(filteredFrequentItems, true, false, fields)) + ) + ).toEqual({ + root: { + name: '', + set: [], + docCount: 0, + pValue: 0, + children: [ + { + name: "792/1505 500 home.php '*'", + set: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + children: [ + { + name: "792/1505 500 home.php '*'", + set: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + children: [], + }, + ], + }, + ], }, - { - fieldName: 'airline', - fieldValue: 'UAL', - }, - ], - docCount: 101, - pValue: 0.01, - }, - { - id: 'group-2', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - }, - { - fieldName: 'airline', - fieldValue: 'AAL', - }, - ], - docCount: 49, - pValue: 0.001, - }, -]; - -describe('get_simple_hierarchical_tree', () => { - describe('getFieldValuePairCounts', () => { - it('returns a nested record with field/value pair counts', () => { - const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); - - expect(fieldValuePairCounts).toEqual({ - airline: { - AAL: 1, - UAL: 1, - }, - 'custom_field.keyword': { - deviation: 2, - }, - }); - }); - }); - - describe('markDuplicates', () => { - it('marks duplicates based on change point groups', () => { - const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); - const markedDuplicates = markDuplicates(changePointGroups, fieldValuePairCounts); - - expect(markedDuplicates).toEqual([ - { - id: 'group-1', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - duplicate: true, - }, - { - fieldName: 'airline', - fieldValue: 'UAL', - duplicate: false, - }, - ], - docCount: 101, - pValue: 0.01, - }, - { - id: 'group-2', - group: [ - { - fieldName: 'custom_field.keyword', - fieldValue: 'deviation', - duplicate: true, - }, - { - fieldName: 'airline', - fieldValue: 'AAL', - duplicate: false, - }, - ], - docCount: 49, - pValue: 0.001, - }, - ]); + fields: ['response_code', 'url', 'user'], }); }); }); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts index 9f39d1eb11f681..41c014e27af440 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts @@ -5,47 +5,15 @@ * 2.0. */ -// import { omit, uniq } from 'lodash'; +import type { ItemsetResult, SimpleHierarchicalTreeNode } from '../../../common/types'; -import type { ChangePointGroup, FieldValuePair } from '@kbn/ml-agg-utils'; -import { stringHash } from '@kbn/ml-string-hash'; +import { getValueCounts } from './get_value_counts'; +import { getValuesDescending } from './get_values_descending'; -import type { ItemsetResult } from './fetch_frequent_items'; +function NewNodeFactory(name: string): SimpleHierarchicalTreeNode { + const children: SimpleHierarchicalTreeNode[] = []; -function getValueCounts(df: ItemsetResult[], field: string) { - return df.reduce>((p, c) => { - if (c.set[field] === undefined) { - return p; - } - p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1; - return p; - }, {}); -} - -function getValuesDescending(df: ItemsetResult[], field: string): string[] { - const valueCounts = getValueCounts(df, field); - const keys = Object.keys(valueCounts); - - return keys.sort((a, b) => { - return valueCounts[b] - valueCounts[a]; - }); -} - -interface NewNode { - name: string; - set: FieldValuePair[]; - docCount: number; - pValue: number | null; - children: NewNode[]; - icon: string; - iconStyle: string; - addNode: (node: NewNode) => void; -} - -function NewNodeFactory(name: string): NewNode { - const children: NewNode[] = []; - - const addNode = (node: NewNode) => { + const addNode = (node: SimpleHierarchicalTreeNode) => { children.push(node); }; @@ -55,19 +23,15 @@ function NewNodeFactory(name: string): NewNode { docCount: 0, pValue: 0, children, - icon: 'default', - iconStyle: 'default', addNode, }; } /** - * Simple (poorly implemented) function that constructs a tree from an itemset DataFrame sorted by support (count) + * Simple function that constructs a tree from an itemset DataFrame sorted by support (count) * The resulting tree components are non-overlapping subsets of the data. * In summary, we start with the most inclusive itemset (highest count), and perform a depth first search in field order. * - * TODO - the code style here is hacky and should be re-written - * * @param displayParent * @param parentDocCount * @param parentLabel @@ -80,7 +44,7 @@ function NewNodeFactory(name: string): NewNode { */ function dfDepthFirstSearch( fields: string[], - displayParent: NewNode, + displayParent: SimpleHierarchicalTreeNode, parentDocCount: number, parentLabel: string, field: string, @@ -108,7 +72,7 @@ function dfDepthFirstSearch( let label = `${parentLabel} ${value}`; - let displayNode: NewNode; + let displayNode: SimpleHierarchicalTreeNode; if (parentDocCount === docCount && collapseRedundant) { // collapse identical paths displayParent.name += ` ${value}`; @@ -118,7 +82,6 @@ function dfDepthFirstSearch( displayNode = displayParent; } else { displayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`); - displayNode.iconStyle = 'warning'; displayNode.set = [...displayParent.set]; displayNode.set.push({ fieldName: field, fieldValue: value }); displayNode.docCount = docCount; @@ -130,8 +93,6 @@ function dfDepthFirstSearch( while (true) { const nextFieldIndex = fields.indexOf(field) + 1; if (nextFieldIndex >= fields.length) { - displayNode.icon = 'file'; - displayNode.iconStyle = 'info'; return docCount; } nextField = fields[nextFieldIndex]; @@ -147,7 +108,6 @@ function dfDepthFirstSearch( displayNode.name += ` '*'`; label += ` '*'`; const nextDisplayNode = NewNodeFactory(`${docCount}/${totalDocCount}${label}`); - nextDisplayNode.iconStyle = 'warning'; nextDisplayNode.set = displayNode.set; nextDisplayNode.docCount = docCount; nextDisplayNode.pValue = pValue; @@ -194,12 +154,6 @@ export function getSimpleHierarchicalTree( displayOther: boolean, fields: string[] = [] ) { - // const candidates = uniq( - // df.flatMap((d) => - // Object.keys(omit(d, ['size', 'maxPValue', 'doc_count', 'support', 'total_doc_count'])) - // ) - // ); - const field = fields[0]; const totalDocCount = Math.max(...df.map((d) => d.total_doc_count)); @@ -222,70 +176,3 @@ export function getSimpleHierarchicalTree( return { root: newRoot, fields }; } - -/** - * Get leaves from hierarchical tree. - */ -export function getSimpleHierarchicalTreeLeaves( - tree: NewNode, - leaves: ChangePointGroup[], - level = 1 -) { - if (tree.children.length === 0) { - leaves.push({ - id: `${stringHash(JSON.stringify(tree.set))}`, - group: tree.set, - docCount: tree.docCount, - pValue: tree.pValue, - }); - } else { - for (const child of tree.children) { - const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1); - if (newLeaves.length > 0) { - leaves.push(...newLeaves); - } - } - } - - if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) { - return []; - } - - return leaves; -} - -type FieldValuePairCounts = Record>; -/** - * Get a nested record of field/value pairs with counts - */ -export function getFieldValuePairCounts(cpgs: ChangePointGroup[]): FieldValuePairCounts { - return cpgs.reduce((p, { group }) => { - group.forEach(({ fieldName, fieldValue }) => { - if (p[fieldName] === undefined) { - p[fieldName] = {}; - } - p[fieldName][fieldValue] = p[fieldName][fieldValue] ? p[fieldName][fieldValue] + 1 : 1; - }); - return p; - }, {}); -} - -/** - * Analyse duplicate field/value pairs in change point groups. - */ -export function markDuplicates( - cpgs: ChangePointGroup[], - fieldValuePairCounts: FieldValuePairCounts -): ChangePointGroup[] { - return cpgs.map((cpg) => { - return { - ...cpg, - group: cpg.group.map((g) => { - return { - ...g, - duplicate: fieldValuePairCounts[g.fieldName][g.fieldValue] > 1, - }; - }), - }; - }); -} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts new file mode 100644 index 00000000000000..9567d38f3d402a --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.test.ts @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { fields } from '../../../common/__mocks__/artificial_logs/fields'; +import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items'; + +import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; +import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; + +describe('getSimpleHierarchicalTreeLeaves', () => { + it('returns the hierarchical tree leaves', () => { + const simpleHierarchicalTree = getSimpleHierarchicalTree( + filteredFrequentItems, + true, + false, + fields + ); + const leaves = getSimpleHierarchicalTreeLeaves(simpleHierarchicalTree.root, []); + expect(leaves).toEqual([ + { + id: '2038579476', + group: [ + { fieldName: 'response_code', fieldValue: '500' }, + { fieldName: 'url', fieldValue: 'home.php' }, + ], + docCount: 792, + pValue: 0.010770456205312423, + }, + ]); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts new file mode 100644 index 00000000000000..699c6e447c4de0 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree_leaves.ts @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ChangePointGroup } from '@kbn/ml-agg-utils'; +import { stringHash } from '@kbn/ml-string-hash'; + +import type { SimpleHierarchicalTreeNode } from '../../../common/types'; + +/** + * Get leaves from hierarchical tree. + */ +export function getSimpleHierarchicalTreeLeaves( + tree: SimpleHierarchicalTreeNode, + leaves: ChangePointGroup[], + level = 1 +) { + if (tree.children.length === 0) { + leaves.push({ + id: `${stringHash(JSON.stringify(tree.set))}`, + group: tree.set, + docCount: tree.docCount, + pValue: tree.pValue, + }); + } else { + for (const child of tree.children) { + const newLeaves = getSimpleHierarchicalTreeLeaves(child, [], level + 1); + if (newLeaves.length > 0) { + leaves.push(...newLeaves); + } + } + } + + if (leaves.length === 1 && leaves[0].group.length === 0 && leaves[0].docCount === 0) { + return []; + } + + return leaves; +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts new file mode 100644 index 00000000000000..744179c485caae --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.test.ts @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { getValueCounts } from './get_value_counts'; + +describe('getValueCounts', () => { + it('get value counts for field response_code', () => { + expect(getValueCounts(frequentItems, 'response_code')).toEqual({ + '200': 1, + '404': 1, + '500': 3, + }); + }); + + it('get value counts for field url', () => { + expect(getValueCounts(frequentItems, 'url')).toEqual({ 'home.php': 6 }); + }); + + it('get value counts for field user', () => { + expect(getValueCounts(frequentItems, 'user')).toEqual({ + Mary: 1, + Paul: 1, + Peter: 3, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts new file mode 100644 index 00000000000000..b287d49494d788 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../../common/types'; + +export function getValueCounts(df: ItemsetResult[], field: string) { + return df.reduce>((p, c) => { + if (c.set[field] === undefined) { + return p; + } + p[c.set[field]] = p[c.set[field]] ? p[c.set[field]] + 1 : 1; + return p; + }, {}); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts new file mode 100644 index 00000000000000..cd4935b4fcc8fd --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.test.ts @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items'; +import { getValuesDescending } from './get_values_descending'; + +describe('getValuesDescending', () => { + it('get descending values for field response_code', () => { + expect(getValuesDescending(frequentItems, 'response_code')).toEqual(['500', '200', '404']); + }); + + it('get descending values for field url', () => { + expect(getValuesDescending(frequentItems, 'url')).toEqual(['home.php']); + }); + + it('get descending values for field user', () => { + expect(getValuesDescending(frequentItems, 'user')).toEqual(['Peter', 'Mary', 'Paul']); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts new file mode 100644 index 00000000000000..8429ca4fcae75a --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ItemsetResult } from '../../../common/types'; + +import { getValueCounts } from './get_value_counts'; + +export function getValuesDescending(df: ItemsetResult[], field: string): string[] { + const valueCounts = getValueCounts(df, field); + const keys = Object.keys(valueCounts); + + return keys.sort((a, b) => { + return valueCounts[b] - valueCounts[a]; + }); +} diff --git a/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts new file mode 100644 index 00000000000000..448f3003fc924c --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.test.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { changePointGroups } from '../../../common/__mocks__/artificial_logs/change_point_groups'; +import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points'; + +import { duplicateIdentifier } from './duplicate_identifier'; +import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates'; +import { dropDuplicates, groupDuplicates } from './fetch_frequent_items'; +import { getFieldValuePairCounts } from './get_field_value_pair_counts'; +import { getMarkedDuplicates } from './get_marked_duplicates'; +import { getMissingChangePoints } from './get_missing_change_points'; +import { transformChangePointToGroup } from './transform_change_point_to_group'; + +describe('getMissingChangePoints', () => { + it('get missing change points', () => { + const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier); + + const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter( + (g) => g.group.length > 1 + ); + + const fieldValuePairCounts = getFieldValuePairCounts(changePointGroups); + const markedDuplicates = getMarkedDuplicates(changePointGroups, fieldValuePairCounts); + const groupsWithReaddedDuplicates = getGroupsWithReaddedDuplicates( + markedDuplicates, + groupedChangePoints + ); + + const missingChangePoints = getMissingChangePoints( + deduplicatedChangePoints, + groupsWithReaddedDuplicates + ); + + const transformed = transformChangePointToGroup(missingChangePoints[0], groupedChangePoints); + + expect(transformed).toEqual({ + docCount: 1981, + group: [{ duplicate: false, fieldName: 'user', fieldValue: 'Peter' }], + id: '817080373', + pValue: 2.7454255728359757e-21, + }); + }); +}); diff --git a/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts new file mode 100644 index 00000000000000..8e6c77971dcee4 --- /dev/null +++ b/x-pack/plugins/aiops/server/routes/queries/transform_change_point_to_group.ts @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { stringHash } from '@kbn/ml-string-hash'; +import type { ChangePoint } from '@kbn/ml-agg-utils'; + +import type { ChangePointDuplicateGroup } from '../../../common/types'; + +export function transformChangePointToGroup( + changePoint: ChangePoint, + groupedChangePoints: ChangePointDuplicateGroup[] +) { + const { fieldName, fieldValue, doc_count: docCount, pValue } = changePoint; + + const duplicates = groupedChangePoints.find((d) => + d.group.some((dg) => dg.fieldName === fieldName && dg.fieldValue === fieldValue) + ); + + if (duplicates !== undefined) { + return { + id: `${stringHash( + JSON.stringify( + duplicates.group.map((d) => ({ + fieldName: d.fieldName, + fieldValue: d.fieldValue, + })) + ) + )}`, + group: duplicates.group.map((d) => ({ + fieldName: d.fieldName, + fieldValue: d.fieldValue, + duplicate: false, + })), + docCount, + pValue, + }; + } else { + return { + id: `${stringHash(JSON.stringify({ fieldName, fieldValue }))}`, + group: [ + { + fieldName, + fieldValue, + duplicate: false, + }, + ], + docCount, + pValue, + }; + } +} diff --git a/x-pack/test/api_integration/apis/aiops/test_data.ts b/x-pack/test/api_integration/apis/aiops/test_data.ts index 8503adc74a250c..3be75f1e875a8d 100644 --- a/x-pack/test/api_integration/apis/aiops/test_data.ts +++ b/x-pack/test/api_integration/apis/aiops/test_data.ts @@ -5,6 +5,12 @@ * 2.0. */ +// We're using the mocks for jest unit tests as expected data in the integration tests here. +// This makes sure should the assertions for the integration tests need to be updated, +// that also the jest unit tests use mocks that are not outdated. +import { changePoints as artificialLogChangePoints } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/change_points'; +import { finalChangePointGroups as artificialLogsChangePointGroups } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_change_point_groups'; + import type { TestData } from './types'; export const explainLogRateSpikesTestData: TestData[] = [ @@ -86,70 +92,8 @@ export const explainLogRateSpikesTestData: TestData[] = [ groupHistogramFilter: 'add_change_point_group_histogram', histogramFilter: 'add_change_points_histogram', errorFilter: 'add_error', - changePoints: [ - { - fieldName: 'response_code', - fieldValue: '500', - doc_count: 1821, - bg_count: 553, - total_doc_count: 4671, - total_bg_count: 1975, - score: 26.546201745993947, - pValue: 2.9589053032077285e-12, - normalizedScore: 0.7814127409489161, - }, - { - fieldName: 'url', - fieldValue: 'home.php', - doc_count: 1742, - bg_count: 632, - total_doc_count: 4671, - total_bg_count: 1975, - score: 4.53094842981472, - pValue: 0.010770456205312423, - normalizedScore: 0.10333028878375965, - }, - { - fieldName: 'url', - fieldValue: 'login.php', - doc_count: 1742, - bg_count: 632, - total_doc_count: 4671, - total_bg_count: 1975, - score: 4.53094842981472, - pValue: 0.010770456205312423, - normalizedScore: 0.10333028878375965, - }, - { - fieldName: 'user', - fieldValue: 'Peter', - doc_count: 1981, - bg_count: 553, - total_doc_count: 4671, - total_bg_count: 1975, - score: 47.34435085428873, - pValue: 2.7454255728359757e-21, - normalizedScore: 0.8327337555873047, - }, - ], - groups: [ - { - id: '2038579476', - group: [ - { fieldName: 'response_code', fieldValue: '500', duplicate: false }, - { fieldName: 'url', fieldValue: 'home.php', duplicate: false }, - { fieldName: 'url', fieldValue: 'login.php', duplicate: false }, - ], - docCount: 792, - pValue: 0.010770456205312423, - }, - { - id: '817080373', - group: [{ fieldName: 'user', fieldValue: 'Peter', duplicate: false }], - docCount: 1981, - pValue: 2.7454255728359757e-21, - }, - ], + changePoints: artificialLogChangePoints, + groups: artificialLogsChangePointGroups, histogramLength: 20, }, },