-
Notifications
You must be signed in to change notification settings - Fork 8.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
25 changed files
with
1,709 additions
and
1,254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# @kbn/ml-chi2test | ||
|
||
Empty package generated by @kbn/generate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { criticalTableLookup } from './critical_table_lookup'; | ||
import type { Histogram } from './types'; | ||
|
||
/** | ||
* Compute the p-value for how similar the datasets are. | ||
* Returned value ranges from 0 to 1, with 1 meaning the datasets are identical. | ||
* @param normalizedBaselineTerms | ||
* @param normalizedDriftedTerms | ||
*/ | ||
export const computeChi2PValue = ( | ||
normalizedBaselineTerms: Histogram[], | ||
normalizedDriftedTerms: Histogram[] | ||
) => { | ||
// Get all unique keys from both arrays | ||
const allKeys: string[] = Array.from( | ||
new Set([ | ||
...normalizedBaselineTerms.map((term) => term.key.toString()), | ||
...normalizedDriftedTerms.map((term) => term.key.toString()), | ||
]) | ||
).slice(0, 100); | ||
|
||
// Calculate the chi-squared statistic and degrees of freedom | ||
let chiSquared: number = 0; | ||
const degreesOfFreedom: number = allKeys.length - 1; | ||
|
||
if (degreesOfFreedom === 0) return 1; | ||
|
||
allKeys.forEach((key) => { | ||
const baselineTerm = normalizedBaselineTerms.find((term) => term.key === key); | ||
const driftedTerm = normalizedDriftedTerms.find((term) => term.key === key); | ||
|
||
const observed: number = driftedTerm?.percentage ?? 0; | ||
const expected: number = baselineTerm?.percentage ?? 0; | ||
chiSquared += Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero | ||
}); | ||
|
||
return criticalTableLookup(chiSquared, degreesOfFreedom); | ||
}; |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { CRITICAL_VALUES_TABLE, SIGNIFICANCE_LEVELS } from './constants'; | ||
|
||
export const criticalTableLookup = (chi2Statistic: number, df: number) => { | ||
if (df < 1) return 1; | ||
if (!Number.isInteger(df)) throw Error('Degrees of freedom must be a valid integer'); | ||
|
||
// Get the row index | ||
const rowIndex: number = df - 1; | ||
|
||
// Get the column index | ||
let minDiff: number = Math.abs(CRITICAL_VALUES_TABLE[rowIndex][0] - chi2Statistic); | ||
let columnIndex: number = 0; | ||
for (let j = 1; j < CRITICAL_VALUES_TABLE[rowIndex].length; j++) { | ||
const diff: number = Math.abs(CRITICAL_VALUES_TABLE[rowIndex][j] - chi2Statistic); | ||
if (diff < minDiff) { | ||
minDiff = diff; | ||
columnIndex = j; | ||
} | ||
} | ||
|
||
const significanceLevel: number = SIGNIFICANCE_LEVELS[columnIndex]; | ||
return significanceLevel; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
export { computeChi2PValue } from './compute_chi_2_pvalue'; | ||
export { criticalTableLookup } from './critical_table_lookup'; | ||
export { CRITICAL_VALUES_TABLE, SIGNIFICANCE_LEVELS } from './constants'; | ||
export type { Histogram } from './types'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
module.exports = { | ||
preset: '@kbn/test', | ||
rootDir: '../../../..', | ||
roots: ['<rootDir>/x-pack/packages/ml/chi2test'], | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"type": "shared-common", | ||
"id": "@kbn/ml-chi2test", | ||
"owner": "@elastic/ml-ui" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"name": "@kbn/ml-chi2test", | ||
"private": true, | ||
"version": "1.0.0", | ||
"license": "SSPL-1.0 OR Elastic License 2.0" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"extends": "../../../../tsconfig.base.json", | ||
"compilerOptions": { | ||
"outDir": "target/types", | ||
"types": [ | ||
"jest", | ||
"node", | ||
"react" | ||
] | ||
}, | ||
"include": [ | ||
"**/*.ts", | ||
"**/*.tsx", | ||
], | ||
"exclude": [ | ||
"target/**/*" | ||
], | ||
"kbn_references": [] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
export interface Histogram { | ||
doc_count: number; | ||
key: string | number; | ||
percentage?: number; | ||
} |
48 changes: 48 additions & 0 deletions
48
x-pack/plugins/aiops/common/api/log_categorization/create_categorize_query.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { cloneDeep } from 'lodash'; | ||
|
||
import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; | ||
|
||
export function createCategorizeQuery( | ||
queryIn: QueryDslQueryContainer, | ||
timeField: string, | ||
from: number | undefined, | ||
to: number | undefined | ||
) { | ||
const query = cloneDeep(queryIn); | ||
|
||
if (query.bool === undefined) { | ||
query.bool = {}; | ||
} | ||
if (query.bool.must === undefined) { | ||
query.bool.must = []; | ||
if (query.match_all !== undefined) { | ||
query.bool.must.push({ match_all: query.match_all }); | ||
delete query.match_all; | ||
} | ||
} | ||
if (query.multi_match !== undefined) { | ||
query.bool.should = { | ||
multi_match: query.multi_match, | ||
}; | ||
delete query.multi_match; | ||
} | ||
|
||
(query.bool.must as QueryDslQueryContainer[]).push({ | ||
range: { | ||
[timeField]: { | ||
gte: from, | ||
lte: to, | ||
format: 'epoch_millis', | ||
}, | ||
}, | ||
}); | ||
|
||
return query; | ||
} |
66 changes: 66 additions & 0 deletions
66
x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; | ||
|
||
import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils'; | ||
|
||
import { createCategorizeQuery } from './create_categorize_query'; | ||
|
||
const CATEGORY_LIMIT = 1000; | ||
const EXAMPLE_LIMIT = 1; | ||
|
||
export function createCategoryRequest( | ||
index: string, | ||
field: string, | ||
timeField: string, | ||
from: number | undefined, | ||
to: number | undefined, | ||
queryIn: QueryDslQueryContainer, | ||
wrap: ReturnType<typeof createRandomSamplerWrapper>['wrap'], | ||
intervalMs?: number | ||
) { | ||
const query = createCategorizeQuery(queryIn, timeField, from, to); | ||
const aggs = { | ||
categories: { | ||
categorize_text: { | ||
field, | ||
size: CATEGORY_LIMIT, | ||
}, | ||
aggs: { | ||
hit: { | ||
top_hits: { | ||
size: EXAMPLE_LIMIT, | ||
sort: [timeField], | ||
_source: field, | ||
}, | ||
}, | ||
...(intervalMs | ||
? { | ||
sparkline: { | ||
date_histogram: { | ||
field: timeField, | ||
fixed_interval: `${intervalMs}ms`, | ||
}, | ||
}, | ||
} | ||
: {}), | ||
}, | ||
}, | ||
}; | ||
|
||
return { | ||
params: { | ||
index, | ||
size: 0, | ||
body: { | ||
query, | ||
aggs: wrap(aggs), | ||
}, | ||
}, | ||
}; | ||
} |
79 changes: 79 additions & 0 deletions
79
x-pack/plugins/aiops/common/api/log_categorization/process_category_results.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { get } from 'lodash'; | ||
|
||
import { estypes } from '@elastic/elasticsearch'; | ||
|
||
import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils'; | ||
|
||
export interface Category { | ||
key: string; | ||
count: number; | ||
examples: string[]; | ||
sparkline?: Array<{ doc_count: number; key: number; key_as_string: string }>; | ||
} | ||
|
||
export interface CategoriesAgg { | ||
categories: { | ||
buckets: Array<{ | ||
key: string; | ||
doc_count: number; | ||
hit: { hits: { hits: Array<{ _source: { message: string } }> } }; | ||
sparkline: { | ||
buckets: Array<{ key_as_string: string; key: number; doc_count: number }>; | ||
}; | ||
}>; | ||
}; | ||
} | ||
|
||
interface CategoriesSampleAgg { | ||
sample: CategoriesAgg; | ||
} | ||
|
||
export interface CatResponse { | ||
rawResponse: estypes.SearchResponseBody<unknown, CategoriesAgg | CategoriesSampleAgg>; | ||
} | ||
|
||
export type SparkLinesPerCategory = Record<string, Record<number, number>>; | ||
|
||
export function processCategoryResults( | ||
result: CatResponse, | ||
field: string, | ||
unwrap: ReturnType<typeof createRandomSamplerWrapper>['unwrap'] | ||
) { | ||
const sparkLinesPerCategory: SparkLinesPerCategory = {}; | ||
const { aggregations } = result.rawResponse; | ||
if (aggregations === undefined) { | ||
throw new Error('processCategoryResults failed, did not return aggregations.'); | ||
} | ||
const { | ||
categories: { buckets }, | ||
} = unwrap( | ||
aggregations as unknown as Record<string, estypes.AggregationsAggregate> | ||
) as CategoriesAgg; | ||
|
||
const categories: Category[] = buckets.map((b) => { | ||
sparkLinesPerCategory[b.key] = | ||
b.sparkline === undefined | ||
? {} | ||
: b.sparkline.buckets.reduce<Record<number, number>>((acc2, cur2) => { | ||
acc2[cur2.key] = cur2.doc_count; | ||
return acc2; | ||
}, {}); | ||
|
||
return { | ||
key: b.key, | ||
count: b.doc_count, | ||
examples: b.hit.hits.hits.map((h) => get(h._source, field)), | ||
}; | ||
}); | ||
return { | ||
categories, | ||
sparkLinesPerCategory, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.