Skip to content

Commit

Permalink
fix handling of multiple text field names
Browse files Browse the repository at this point in the history
  • Loading branch information
walterra committed Sep 28, 2023
1 parent e1dacc5 commit df03ec3
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 47 deletions.
3 changes: 2 additions & 1 deletion x-pack/packages/ml/agg_utils/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,15 @@ export interface HistogramField {
* aggregation type.
*/
export interface SignificantTerm extends FieldValuePair {
key: string;
type: 'keyword' | 'log-pattern';
doc_count: number;
bg_count: number;
total_doc_count: number;
total_bg_count: number;
score: number;
pValue: number | null;
normalizedScore: number;
type: 'keyword' | 'log-pattern';
histogram?: SignificantTermHistogramItem[];
unique?: boolean;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type { SignificantTerm } from '@kbn/ml-agg-utils';

export const significantTerms: SignificantTerm[] = [
{
key: 'user:Peter',
fieldName: 'user',
fieldValue: 'Peter',
doc_count: 1981,
Expand All @@ -21,6 +22,7 @@ export const significantTerms: SignificantTerm[] = [
type: 'keyword',
},
{
key: 'response_code:500',
fieldName: 'response_code',
fieldValue: '500',
doc_count: 1819,
Expand All @@ -33,6 +35,7 @@ export const significantTerms: SignificantTerm[] = [
type: 'keyword',
},
{
key: 'url:home.php',
fieldName: 'url',
fieldValue: 'home.php',
doc_count: 1744,
Expand All @@ -45,6 +48,7 @@ export const significantTerms: SignificantTerm[] = [
type: 'keyword',
},
{
key: 'url:login.php',
fieldName: 'url',
fieldValue: 'login.php',
doc_count: 1738,
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/aiops/common/api/stream_reducer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ describe('streamReducer', () => {
initialState,
addSignificantTermsAction([
{
key: 'the-field-name:the-field-value',
fieldName: 'the-field-name',
fieldValue: 'the-field-value',
doc_count: 10,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { GroupTableItem } from '../../components/log_rate_analysis_results_
import { buildExtendedBaseFilterCriteria } from './build_extended_base_filter_criteria';

const selectedSignificantTermMock: SignificantTerm = {
key: 'meta.cloud.instance_id.keyword:1234',
doc_count: 53408,
bg_count: 1154,
fieldName: 'meta.cloud.instance_id.keyword',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import {
EuiBadge,
EuiBasicTable,
EuiBasicTableColumn,
EuiCode,
EuiIcon,
EuiIconTip,
EuiText,
EuiTableSortingType,
EuiToolTip,
} from '@elastic/eui';
Expand Down Expand Up @@ -143,9 +145,19 @@ export const LogRateAnalysisResultsTable: FC<LogRateAnalysisResultsTableProps> =
name: i18n.translate('xpack.aiops.logRateAnalysis.resultsTable.fieldValueLabel', {
defaultMessage: 'Field value',
}),
render: (_, { fieldValue }) => {
return <div css={cssMultiLineTruncation}>{String(fieldValue)}</div>;
},
render: (_, { fieldValue, type }) => (
<div css={cssMultiLineTruncation}>
{type === 'keyword' ? (
String(fieldValue)
) : (
<EuiText size="xs">
<EuiCode language="log" transparentBackground css={{ paddingInline: '0px' }}>
{fieldValue}
</EuiCode>
</EuiText>
)}
</div>
),
sortable: true,
textOnly: true,
truncateText: false,
Expand Down
3 changes: 1 addition & 2 deletions x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ export const defineLogRateAnalysisRoute = (
const indexInfo = await fetchIndexInfo(
client,
request.body,
['message'],
['message', 'error.message'],
abortSignal
);

Expand All @@ -238,7 +238,6 @@ export const defineLogRateAnalysisRoute = (
textFieldCandidates.push(...indexInfo.textFieldCandidates);
totalDocCount = indexInfo.totalDocCount;
} catch (e) {
// console.log(e);
if (!isRequestAbortedError(e)) {
logger.error(`Failed to fetch index information, got: \n${e.toString()}`);
pushError(`Failed to fetch index information.`);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ export const fetchSignificantCategories = async (
emitError: (m: string) => void,
abortSignal?: AbortSignal
) => {
// To make sure we have the same categories for both baseline and deviation,
// we do an initial query that spans across baseline start and deviation end.
// We could update this to query the exact baseline AND deviation range, but
// wanted to avoid the refactor here and it should be good enough for a start.
const categoriesOverall = await fetchCategories(
esClient,
params,
fieldNames,
params.baselineMin,
params.deviationMax,
logger,
sampleProbability,
emitError,
abortSignal
);

const categoriesBaseline = await fetchCategories(
esClient,
params,
Expand All @@ -63,49 +79,60 @@ export const fetchSignificantCategories = async (
abortSignal
);

if (categoriesBaseline.length === 0 || categoriesDeviation.length === 0) return [];

const categoriesBaselineTotalCount = getCategoriesTotalCount(categoriesBaseline[0].categories);
const categoriesBaselineTestData = getCategoriesTestData(categoriesBaseline[0].categories);

const categoriesDeviationTotalCount = getCategoriesTotalCount(categoriesDeviation[0].categories);
const categoriesDeviationTestData = getCategoriesTestData(categoriesDeviation[0].categories);

// Get all unique keys from both arrays
const allKeys: string[] = Array.from(
new Set([
...categoriesBaselineTestData.map((term) => term.key.toString()),
...categoriesDeviationTestData.map((term) => term.key.toString()),
])
).slice(0, 100);
if (
categoriesBaseline.length !== fieldNames.length ||
categoriesDeviation.length !== fieldNames.length
)
return [];

const significantCategories: SignificantTerm[] = [];

allKeys.forEach((key) => {
const baselineTerm = categoriesBaselineTestData.find((term) => term.key === key);
const deviationTerm = categoriesDeviationTestData.find((term) => term.key === key);

const observed: number = deviationTerm?.percentage ?? 0;
const expected: number = baselineTerm?.percentage ?? 0;
const chiSquared = Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero

const pValue = criticalTableLookup(chiSquared, 1);
const score = Math.log(pValue);

if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) {
significantCategories.push({
fieldName: 'message',
fieldValue: key,
doc_count: deviationTerm?.doc_count ?? 0,
bg_count: baselineTerm?.doc_count ?? 0,
total_doc_count: categoriesDeviationTotalCount,
total_bg_count: categoriesBaselineTotalCount,
score,
pValue,
normalizedScore: getNormalizedScore(score),
type: 'log-pattern',
});
}
fieldNames.forEach((fieldName, i) => {
const categoriesBaselineTotalCount = getCategoriesTotalCount(categoriesBaseline[i].categories);
const categoriesBaselineTestData = getCategoriesTestData(categoriesBaseline[i].categories);

const categoriesDeviationTotalCount = getCategoriesTotalCount(
categoriesDeviation[i].categories
);
const categoriesDeviationTestData = getCategoriesTestData(categoriesDeviation[i].categories);

// Get all unique keys from both arrays
const allKeys: string[] = Array.from(
new Set([
...categoriesBaselineTestData.map((term) => term.key.toString()),
...categoriesDeviationTestData.map((term) => term.key.toString()),
])
).slice(0, 100);

allKeys.forEach((key) => {
const categoryData = categoriesOverall[i].categories.find((c) => c.key === key);

const baselineTerm = categoriesBaselineTestData.find((term) => term.key === key);
const deviationTerm = categoriesDeviationTestData.find((term) => term.key === key);

const observed: number = deviationTerm?.percentage ?? 0;
const expected: number = baselineTerm?.percentage ?? 0;
const chiSquared = Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero

const pValue = criticalTableLookup(chiSquared, 1);
const score = Math.log(pValue);

if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) {
significantCategories.push({
key,
fieldName,
fieldValue: categoryData?.examples[0] ?? '',
doc_count: deviationTerm?.doc_count ?? 0,
bg_count: baselineTerm?.doc_count ?? 0,
total_doc_count: categoriesDeviationTotalCount,
total_bg_count: categoriesBaselineTotalCount,
score,
pValue,
normalizedScore: getNormalizedScore(score),
type: 'log-pattern',
});
}
});
});

return significantCategories;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ export const fetchSignificantTermPValues = async (

if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD) {
result.push({
key: `${fieldName}:${String(bucket.key)}`,
type: 'keyword',
fieldName,
fieldValue: String(bucket.key),
doc_count: bucket.doc_count,
Expand All @@ -179,7 +181,6 @@ export const fetchSignificantTermPValues = async (
score: bucket.score,
pValue,
normalizedScore: getNormalizedScore(bucket.score),
type: 'keyword',
});
}
}
Expand Down

0 comments on commit df03ec3

Please sign in to comment.