Skip to content

Commit

Permalink
[ML] AIOps: Support text fields in log rate analysis (#165124)
Browse files Browse the repository at this point in the history
Part of #167467.

Adds support for text fields in log pattern analysis. Text fields will
be analysed using log categorization, similar to log pattern analysis.
Significant log patterns will be identified using the `chi2test`
package, similar to how we detect data drifts.
  • Loading branch information
walterra committed Oct 4, 2023
1 parent dfd35c6 commit d8886d8
Show file tree
Hide file tree
Showing 42 changed files with 1,307 additions and 147 deletions.
2 changes: 2 additions & 0 deletions x-pack/packages/ml/agg_utils/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export type {
} from './src/fetch_histograms_for_fields';
export { isMultiBucketAggregate } from './src/is_multi_bucket_aggregate';
export { isSignificantTerm } from './src/type_guards';
export { SIGNIFICANT_TERM_TYPE } from './src/types';
export type {
AggCardinality,
SignificantTerm,
Expand All @@ -27,6 +28,7 @@ export type {
SignificantTermGroupHistogram,
SignificantTermHistogram,
SignificantTermHistogramItem,
SignificantTermType,
HistogramField,
NumericColumnStats,
NumericColumnStatsMap,
Expand Down
2 changes: 2 additions & 0 deletions x-pack/packages/ml/agg_utils/src/type_guards.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ describe('isSignificantTerm', () => {
expect(isSignificantTerm({ fieldValue: '500' })).toBeFalsy();
expect(
isSignificantTerm({
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
doc_count: 1819,
Expand Down
2 changes: 2 additions & 0 deletions x-pack/packages/ml/agg_utils/src/type_guards.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import type { SignificantTerm } from './types';
*/
export function isSignificantTerm(arg: unknown): arg is SignificantTerm {
return isPopulatedObject(arg, [
'key',
'type',
'fieldName',
'fieldValue',
'doc_count',
Expand Down
30 changes: 30 additions & 0 deletions x-pack/packages/ml/agg_utils/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,24 @@ export interface HistogramField {
type: KBN_FIELD_TYPES;
}

/**
* Enumeration of significant term types.
*/
export const SIGNIFICANT_TERM_TYPE = {
KEYWORD: 'keyword',
LOG_PATTERN: 'log_pattern',
} as const;

/**
* Type for significant term type keys.
*/
type SignificantTermTypeKeys = keyof typeof SIGNIFICANT_TERM_TYPE;

/**
* Represents the type of significant term as determined by the SIGNIFICANT_TERM_TYPE enumeration.
*/
export type SignificantTermType = typeof SIGNIFICANT_TERM_TYPE[SignificantTermTypeKeys];

/**
* Represents significant term metadata for a field/value pair.
* This interface is used as a custom type within Log Rate Analysis
Expand All @@ -97,6 +115,12 @@ export interface HistogramField {
* @extends FieldValuePair
*/
export interface SignificantTerm extends FieldValuePair {
/** The key associated with the significant term. */
key: string;

/** The type of the significant term. */
type: SignificantTermType;

/** The document count for the significant term. */
doc_count: number;

Expand Down Expand Up @@ -169,6 +193,12 @@ export interface SignificantTermGroupHistogram {
* @interface
*/
export interface SignificantTermGroupItem extends FieldValuePair {
/** The key associated with the significant term. */
key: string;

/** The type of the significant term. */
type: SignificantTermType;

/** The document count associated with this item. */
docCount: number;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,84 +12,100 @@ export const finalSignificantTermGroups: SignificantTermGroup[] = [
docCount: 632,
group: [
{
docCount: 790,
duplicate: 2,
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
docCount: 790,
duplicate: 2,
pValue: 0.012783309213417932,
},
{
docCount: 632,
duplicate: 2,
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
docCount: 632,
duplicate: 2,
pValue: 0.012783309213417932,
},
],
id: '1982924514',
id: '1937394803',
pValue: 0.012783309213417932,
},
{
docCount: 792,
group: [
{
docCount: 792,
duplicate: 2,
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
docCount: 792,
duplicate: 2,
pValue: 0.012783309213417932,
},
{
docCount: 792,
duplicate: 2,
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
docCount: 792,
duplicate: 2,
pValue: 0.00974308761016614,
},
],
id: '2052830342',
id: '2675980076',
pValue: 0.00974308761016614,
},
{
docCount: 790,
group: [
{
docCount: 792,
duplicate: 2,
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
docCount: 792,
duplicate: 2,
pValue: 0.012783309213417932,
},
{
docCount: 790,
duplicate: 2,
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
docCount: 790,
duplicate: 2,
pValue: 0.012783309213417932,
},
],
id: '3851735068',
id: '3819687732',
pValue: 0.012783309213417932,
},
{
docCount: 636,
group: [
{
docCount: 792,
duplicate: 2,
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
docCount: 792,
duplicate: 2,
pValue: 0.00974308761016614,
},
{
docCount: 636,
duplicate: 2,
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
docCount: 636,
duplicate: 2,
pValue: 0.00974308761016614,
},
],
id: '92732022',
id: '2091742187',
pValue: 0.00974308761016614,
},
];
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,21 @@ export const significantTermGroups: SignificantTermGroup[] = [
id: '2038579476',
group: [
{
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
docCount: 1819,
pValue: 2.9589053032077285e-12,
},
{ fieldName: 'url', fieldValue: 'home.php', docCount: 1744, pValue: 0.010770456205312423 },
{
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
docCount: 1744,
pValue: 0.010770456205312423,
},
],
docCount: 792,
pValue: 0.010770456205312423,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
* 2.0.
*/

export const significantTerms = [
import type { SignificantTerm } from '@kbn/ml-agg-utils';

export const significantTerms: SignificantTerm[] = [
{
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
doc_count: 1981,
Expand All @@ -18,6 +22,8 @@ export const significantTerms = [
normalizedScore: 0.8328439168064725,
},
{
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
doc_count: 1819,
Expand All @@ -29,6 +35,8 @@ export const significantTerms = [
normalizedScore: 0.7809229492301661,
},
{
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
doc_count: 1744,
Expand All @@ -40,6 +48,8 @@ export const significantTerms = [
normalizedScore: 0.12006631193078789,
},
{
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
doc_count: 1738,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ export const significantTermGroups: SignificantTermGroup[] = [
id: 'group-1',
group: [
{
key: 'custom_field.keyword:deviation',
type: 'keyword',
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
docCount: 101,
pValue: 0.01,
},
{
key: 'airline:UAL',
type: 'keyword',
fieldName: 'airline',
fieldValue: 'UAL',
docCount: 101,
Expand All @@ -31,12 +35,16 @@ export const significantTermGroups: SignificantTermGroup[] = [
id: 'group-2',
group: [
{
key: 'custom_field.keyword:deviation',
type: 'keyword',
fieldName: 'custom_field.keyword',
fieldValue: 'deviation',
docCount: 49,
pValue: 0.001,
},
{
key: 'airline:AAL',
type: 'keyword',
fieldName: 'airline',
fieldValue: 'AAL',
docCount: 49,
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugins/aiops/common/api/stream_reducer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ describe('streamReducer', () => {
initialState,
addSignificantTermsAction([
{
key: 'the-field-name:the-field-value',
type: 'keyword',
fieldName: 'the-field-name',
fieldValue: 'the-field-value',
doc_count: 10,
Expand Down
4 changes: 3 additions & 1 deletion x-pack/plugins/aiops/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* 2.0.
*/

import type { SignificantTerm, FieldValuePair } from '@kbn/ml-agg-utils';
import type { SignificantTerm, SignificantTermType, FieldValuePair } from '@kbn/ml-agg-utils';

export interface SignificantTermDuplicateGroup {
keys: Pick<SignificantTerm, keyof SignificantTerm>;
Expand All @@ -24,6 +24,8 @@ export interface ItemsetResult {
}

interface SimpleHierarchicalTreeNodeSet extends FieldValuePair {
key: string;
type: SignificantTermType;
docCount: number;
pValue: number | null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import type { GroupTableItem } from '../../components/log_rate_analysis_results_
import { buildExtendedBaseFilterCriteria } from './build_extended_base_filter_criteria';

const selectedSignificantTermMock: SignificantTerm = {
key: 'meta.cloud.instance_id.keyword:1234',
type: 'keyword',
doc_count: 53408,
bg_count: 1154,
fieldName: 'meta.cloud.instance_id.keyword',
Expand All @@ -29,22 +31,54 @@ const selectedGroupMock: GroupTableItem = {
pValue: 2.2250738585072626e-308,
uniqueItemsCount: 3,
groupItemsSortedByUniqueness: [
{ fieldName: 'error.message', fieldValue: 'rate limit exceeded', docCount: 10, pValue: 0.05 },
{ fieldName: 'message', fieldValue: 'too many requests', docCount: 10, pValue: 0.05 },
{
key: 'error.message:rate limit exceeded',
type: 'keyword',
fieldName: 'error.message',
fieldValue: 'rate limit exceeded',
docCount: 10,
pValue: 0.05,
},
{
key: 'message:too many requests',
type: 'keyword',
fieldName: 'message',
fieldValue: 'too many requests',
docCount: 10,
pValue: 0.05,
},
{
key: 'user_agent.original.keyword:Mozilla/5.0',
type: 'keyword',
fieldName: 'user_agent.original.keyword',
fieldValue: 'Mozilla/5.0',
docCount: 10,
pValue: 0.05,
},
{
key: 'beat.hostname.keyword:ip-192-168-1-1',
type: 'keyword',
fieldName: 'beat.hostname.keyword',
fieldValue: 'ip-192-168-1-1',
docCount: 10,
pValue: 0.05,
},
{ fieldName: 'beat.name.keyword', fieldValue: 'i-1234', docCount: 10, pValue: 0.05 },
{ fieldName: 'docker.container.id.keyword', fieldValue: 'asdf', docCount: 10, pValue: 0.05 },
{
key: 'beat.name.keyword:i-1234',
type: 'keyword',
fieldName: 'beat.name.keyword',
fieldValue: 'i-1234',
docCount: 10,
pValue: 0.05,
},
{
key: 'docker.container.id.keyword:asdf',
type: 'keyword',
fieldName: 'docker.container.id.keyword',
fieldValue: 'asdf',
docCount: 10,
pValue: 0.05,
},
],
histogram: [],
};
Expand Down
Loading

0 comments on commit d8886d8

Please sign in to comment.