diff --git a/src/plugins/data/common/search/aggs/agg_configs.test.ts b/src/plugins/data/common/search/aggs/agg_configs.test.ts index cd0495a3f78c66..c3b7ffe937b35f 100644 --- a/src/plugins/data/common/search/aggs/agg_configs.test.ts +++ b/src/plugins/data/common/search/aggs/agg_configs.test.ts @@ -16,6 +16,15 @@ import type { DataView } from '@kbn/data-views-plugin/common'; import { stubIndexPattern } from '../../stubs'; import { IEsSearchResponse } from '..'; +// Mute moment.tz warnings about not finding a mock timezone +jest.mock('../utils', () => { + const original = jest.requireActual('../utils'); + return { + ...original, + getUserTimeZone: jest.fn(() => 'US/Pacific'), + }; +}); + describe('AggConfigs', () => { const indexPattern: DataView = stubIndexPattern; let typesRegistry: AggTypesRegistryStart; @@ -563,6 +572,82 @@ describe('AggConfigs', () => { '1-bucket>_count' ); }); + + it('prepends a sampling agg whenever sampling is enabled', () => { + const configStates = [ + { + enabled: true, + id: '1', + type: 'avg_bucket', + schema: 'metric', + params: { + customBucket: { + id: '1-bucket', + type: 'date_histogram', + schema: 'bucketAgg', + params: { + field: '@timestamp', + interval: '10s', + }, + }, + customMetric: { + id: '1-metric', + type: 'count', + schema: 'metricAgg', + params: {}, + }, + }, + }, + { + enabled: true, + id: '2', + type: 'terms', + schema: 'bucket', + params: { + field: 'clientip', + }, + }, + { + enabled: true, + id: '3', + type: 'terms', + schema: 'bucket', + params: { + field: 'machine.os.raw', + }, + }, + ]; + + const ac = new AggConfigs( + indexPattern, + configStates, + { typesRegistry, hierarchical: true, probability: 0.5 }, + jest.fn() + ); + const topLevelDsl = ac.toDsl(); + + expect(Object.keys(topLevelDsl)).toContain('sampling'); + expect(Object.keys(topLevelDsl.sampling)).toEqual(['random_sampler', 'aggs']); + expect(Object.keys(topLevelDsl.sampling.aggs)).toContain('2'); + expect(Object.keys(topLevelDsl.sampling.aggs['2'].aggs)).toEqual(['1', '3', '1-bucket']); + }); + + it('should not prepend a sampling agg when no nested agg is avaialble', () => { + const ac = new AggConfigs( + indexPattern, + [ + { + enabled: true, + type: 'count', + schema: 'metric', + }, + ], + { typesRegistry, probability: 0.5 }, + jest.fn() + ); + const topLevelDsl = ac.toDsl(); + expect(Object.keys(topLevelDsl)).not.toContain('sampling'); + }); }); describe('#postFlightTransform', () => { @@ -854,4 +939,74 @@ describe('AggConfigs', () => { `); }); }); + + describe('isSamplingEnabled', () => { + it('should return false if probability is 1', () => { + const ac = new AggConfigs( + indexPattern, + [{ enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } }], + { typesRegistry, probability: 1 }, + jest.fn() + ); + + expect(ac.isSamplingEnabled()).toBeFalsy(); + }); + + it('should return true if probability is less than 1', () => { + const ac = new AggConfigs( + indexPattern, + [{ enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } }], + { typesRegistry, probability: 0.1 }, + jest.fn() + ); + + expect(ac.isSamplingEnabled()).toBeTruthy(); + }); + + it('should return false when all aggs have hasNoDsl flag enabled', () => { + const ac = new AggConfigs( + indexPattern, + [ + { + enabled: true, + type: 'count', + schema: 'metric', + }, + ], + { typesRegistry, probability: 1 }, + jest.fn() + ); + + expect(ac.isSamplingEnabled()).toBeFalsy(); + }); + + it('should return false when no nested aggs are avaialble', () => { + const ac = new AggConfigs( + indexPattern, + [{ enabled: false, type: 'avg', schema: 'metric', params: { field: 'bytes' } }], + { typesRegistry, probability: 1 }, + jest.fn() + ); + + expect(ac.isSamplingEnabled()).toBeFalsy(); + }); + + it('should return true if at least one nested agg is available and probability < 1', () => { + const ac = new AggConfigs( + indexPattern, + [ + { + enabled: true, + type: 'count', + schema: 'metric', + }, + { enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } }, + ], + { typesRegistry, probability: 0.1 }, + jest.fn() + ); + + expect(ac.isSamplingEnabled()).toBeTruthy(); + }); + }); }); diff --git a/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.test.ts b/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.test.ts index 6ce588b98fa9c5..11f44ec2bbd1e7 100644 --- a/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.test.ts +++ b/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.test.ts @@ -18,6 +18,7 @@ import { AggConfigs, CreateAggConfigParams } from '../agg_configs'; import { BUCKET_TYPES } from './bucket_agg_types'; import { IBucketAggConfig } from './bucket_agg_type'; import { mockAggTypesRegistry } from '../test_helpers'; +import { estypes } from '@elastic/elasticsearch'; const indexPattern = { id: '1234', @@ -281,71 +282,56 @@ const nestedOtherResponse = { describe('Terms Agg Other bucket helper', () => { const typesRegistry = mockAggTypesRegistry(); - const getAggConfigs = (aggs: CreateAggConfigParams[] = []) => { - return new AggConfigs(indexPattern, [...aggs], { typesRegistry }, jest.fn()); - }; - - describe('buildOtherBucketAgg', () => { - test('returns a function', () => { - const aggConfigs = getAggConfigs(singleTerm.aggs); - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[0] as IBucketAggConfig, - singleTermResponse - ); - expect(typeof agg).toBe('function'); - }); - - test('correctly builds query with single terms agg', () => { - const aggConfigs = getAggConfigs(singleTerm.aggs); - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[0] as IBucketAggConfig, - singleTermResponse - ); - const expectedResponse = { - aggs: undefined, - filters: { - filters: { - '': { - bool: { - must: [], - filter: [{ exists: { field: 'machine.os.raw' } }], - should: [], - must_not: [ - { match_phrase: { 'machine.os.raw': 'ios' } }, - { match_phrase: { 'machine.os.raw': 'win xp' } }, - ], - }, - }, + for (const probability of [1, 0.5]) { + function enrichResponseWithSampling(response: any) { + if (probability === 1) { + return response; + } + return { + ...response, + aggregations: { + sampling: { + ...response.aggregations, }, }, }; - expect(agg).toBeDefined(); - if (agg) { - expect(agg()['other-filter']).toEqual(expectedResponse); - } - }); + } + function getAggConfigs(aggs: CreateAggConfigParams[] = []) { + return new AggConfigs(indexPattern, [...aggs], { typesRegistry, probability }, jest.fn()); + } + + function getTopAggregations(updatedResponse: estypes.SearchResponse) { + return probability === 1 + ? updatedResponse.aggregations! + : (updatedResponse.aggregations!.sampling as Record); + } + + describe(`buildOtherBucketAgg${probability === 1 ? '- with sampling' : ''}`, () => { + test('returns a function', () => { + const aggConfigs = getAggConfigs(singleTerm.aggs); + const agg = buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[0] as IBucketAggConfig, + enrichResponseWithSampling(singleTermResponse) + ); + expect(typeof agg).toBe('function'); + }); - test('correctly builds query for nested terms agg', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[1] as IBucketAggConfig, - nestedTermResponse - ); - const expectedResponse = { - 'other-filter': { + test('correctly builds query with single terms agg', () => { + const aggConfigs = getAggConfigs(singleTerm.aggs); + const agg = buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[0] as IBucketAggConfig, + enrichResponseWithSampling(singleTermResponse) + ); + const expectedResponse = { aggs: undefined, filters: { filters: { - [`${SEP}IN-with-dash`]: { + '': { bool: { must: [], - filter: [ - { match_phrase: { 'geo.src': 'IN-with-dash' } }, - { exists: { field: 'machine.os.raw' } }, - ], + filter: [{ exists: { field: 'machine.os.raw' } }], should: [], must_not: [ { match_phrase: { 'machine.os.raw': 'ios' } }, @@ -353,272 +339,322 @@ describe('Terms Agg Other bucket helper', () => { ], }, }, - [`${SEP}US-with-dash`]: { - bool: { - must: [], - filter: [ - { match_phrase: { 'geo.src': 'US-with-dash' } }, - { exists: { field: 'machine.os.raw' } }, - ], - should: [], - must_not: [ - { match_phrase: { 'machine.os.raw': 'ios' } }, - { match_phrase: { 'machine.os.raw': 'win xp' } }, - ], + }, + }, + }; + expect(agg).toBeDefined(); + if (agg) { + const resp = agg(); + const topAgg = probability === 1 ? resp : resp.sampling!.aggs; + expect(topAgg['other-filter']).toEqual(expectedResponse); + } + }); + + test('correctly builds query for nested terms agg', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + const agg = buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[1] as IBucketAggConfig, + enrichResponseWithSampling(nestedTermResponse) + ); + const expectedResponse = { + 'other-filter': { + aggs: undefined, + filters: { + filters: { + [`${SEP}IN-with-dash`]: { + bool: { + must: [], + filter: [ + { match_phrase: { 'geo.src': 'IN-with-dash' } }, + { exists: { field: 'machine.os.raw' } }, + ], + should: [], + must_not: [ + { match_phrase: { 'machine.os.raw': 'ios' } }, + { match_phrase: { 'machine.os.raw': 'win xp' } }, + ], + }, + }, + [`${SEP}US-with-dash`]: { + bool: { + must: [], + filter: [ + { match_phrase: { 'geo.src': 'US-with-dash' } }, + { exists: { field: 'machine.os.raw' } }, + ], + should: [], + must_not: [ + { match_phrase: { 'machine.os.raw': 'ios' } }, + { match_phrase: { 'machine.os.raw': 'win xp' } }, + ], + }, }, }, }, }, - }, - }; - expect(agg).toBeDefined(); - if (agg) { - expect(agg()).toEqual(expectedResponse); - } - }); + }; + expect(agg).toBeDefined(); + if (agg) { + const resp = agg(); + const topAgg = probability === 1 ? resp : resp.sampling!.aggs; + // console.log({ probability }, JSON.stringify(topAgg, null, 2)); + expect(topAgg).toEqual(expectedResponse); + } + }); - test('correctly builds query for nested terms agg with one disabled', () => { - const oneDisabledNestedTerms = { - aggs: [ - { - id: '2', - type: BUCKET_TYPES.TERMS, - enabled: false, - params: { - field: { - name: 'machine.os.raw', - indexPattern, - filterable: true, + test('correctly builds query for nested terms agg with one disabled', () => { + const oneDisabledNestedTerms = { + aggs: [ + { + id: '2', + type: BUCKET_TYPES.TERMS, + enabled: false, + params: { + field: { + name: 'machine.os.raw', + indexPattern, + filterable: true, + }, + size: 2, + otherBucket: false, + missingBucket: true, }, - size: 2, - otherBucket: false, - missingBucket: true, }, - }, - { - id: '1', - type: BUCKET_TYPES.TERMS, - params: { - field: { - name: 'geo.src', - indexPattern, - filterable: true, + { + id: '1', + type: BUCKET_TYPES.TERMS, + params: { + field: { + name: 'geo.src', + indexPattern, + filterable: true, + }, + size: 2, + otherBucket: true, + missingBucket: false, }, - size: 2, - otherBucket: true, - missingBucket: false, }, - }, - ], - }; - const aggConfigs = getAggConfigs(oneDisabledNestedTerms.aggs); - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[1] as IBucketAggConfig, - singleTermResponse - ); - const expectedResponse = { - 'other-filter': { - aggs: undefined, - filters: { + ], + }; + const aggConfigs = getAggConfigs(oneDisabledNestedTerms.aggs); + const agg = buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[1] as IBucketAggConfig, + enrichResponseWithSampling(singleTermResponse) + ); + const expectedResponse = { + 'other-filter': { + aggs: undefined, filters: { - '': { - bool: { - filter: [ - { - exists: { - field: 'geo.src', + filters: { + '': { + bool: { + filter: [ + { + exists: { + field: 'geo.src', + }, }, - }, - ], - must: [], - must_not: [ - { - match_phrase: { - 'geo.src': 'ios', + ], + must: [], + must_not: [ + { + match_phrase: { + 'geo.src': 'ios', + }, }, - }, - { - match_phrase: { - 'geo.src': 'win xp', + { + match_phrase: { + 'geo.src': 'win xp', + }, }, - }, - ], - should: [], + ], + should: [], + }, }, }, }, }, - }, - }; - expect(agg).toBeDefined(); - if (agg) { - expect(agg()).toEqual(expectedResponse); - } - }); + }; + expect(agg).toBeDefined(); + if (agg) { + const resp = agg(); + const topAgg = probability === 1 ? resp : resp.sampling!.aggs; + expect(topAgg).toEqual(expectedResponse); + } + }); - test('does not build query if sum_other_doc_count is 0 (exhaustive terms)', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - expect( - buildOtherBucketAgg( + test('does not build query if sum_other_doc_count is 0 (exhaustive terms)', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + expect( + buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[1] as IBucketAggConfig, + enrichResponseWithSampling(exhaustiveNestedTermResponse) + ) + ).toBeFalsy(); + }); + + test('excludes exists filter for scripted fields', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + aggConfigs.aggs[1].params.field = { + ...aggConfigs.aggs[1].params.field, + scripted: true, + }; + const agg = buildOtherBucketAgg( aggConfigs, aggConfigs.aggs[1] as IBucketAggConfig, - exhaustiveNestedTermResponse - ) - ).toBeFalsy(); - }); - - test('excludes exists filter for scripted fields', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - aggConfigs.aggs[1].params.field.scripted = true; - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[1] as IBucketAggConfig, - nestedTermResponse - ); - const expectedResponse = { - 'other-filter': { - aggs: undefined, - filters: { + enrichResponseWithSampling(nestedTermResponse) + ); + const expectedResponse = { + 'other-filter': { + aggs: undefined, filters: { - [`${SEP}IN-with-dash`]: { - bool: { - must: [], - filter: [{ match_phrase: { 'geo.src': 'IN-with-dash' } }], - should: [], - must_not: [ - { - script: { + filters: { + [`${SEP}IN-with-dash`]: { + bool: { + must: [], + filter: [{ match_phrase: { 'geo.src': 'IN-with-dash' } }], + should: [], + must_not: [ + { script: { - lang: undefined, - params: { value: 'ios' }, - source: '(undefined) == value', + script: { + lang: undefined, + params: { value: 'ios' }, + source: '(undefined) == value', + }, }, }, - }, - { - script: { + { script: { - lang: undefined, - params: { value: 'win xp' }, - source: '(undefined) == value', + script: { + lang: undefined, + params: { value: 'win xp' }, + source: '(undefined) == value', + }, }, }, - }, - ], + ], + }, }, - }, - [`${SEP}US-with-dash`]: { - bool: { - must: [], - filter: [{ match_phrase: { 'geo.src': 'US-with-dash' } }], - should: [], - must_not: [ - { - script: { + [`${SEP}US-with-dash`]: { + bool: { + must: [], + filter: [{ match_phrase: { 'geo.src': 'US-with-dash' } }], + should: [], + must_not: [ + { script: { - lang: undefined, - params: { value: 'ios' }, - source: '(undefined) == value', + script: { + lang: undefined, + params: { value: 'ios' }, + source: '(undefined) == value', + }, }, }, - }, - { - script: { + { script: { - lang: undefined, - params: { value: 'win xp' }, - source: '(undefined) == value', + script: { + lang: undefined, + params: { value: 'win xp' }, + source: '(undefined) == value', + }, }, }, - }, - ], + ], + }, }, }, }, }, - }, - }; - expect(agg).toBeDefined(); - if (agg) { - expect(agg()).toEqual(expectedResponse); - } - }); + }; + expect(agg).toBeDefined(); + if (agg) { + const resp = agg(); + const topAgg = probability === 1 ? resp : resp.sampling!.aggs; + expect(topAgg).toEqual(expectedResponse); + } + }); - test('returns false when nested terms agg has no buckets', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - const agg = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[1] as IBucketAggConfig, - nestedTermResponseNoResults - ); + test('returns false when nested terms agg has no buckets', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + const agg = buildOtherBucketAgg( + aggConfigs, + aggConfigs.aggs[1] as IBucketAggConfig, + enrichResponseWithSampling(nestedTermResponseNoResults) + ); - expect(agg).toEqual(false); + expect(agg).toEqual(false); + }); }); - }); - describe('mergeOtherBucketAggResponse', () => { - test('correctly merges other bucket with single terms agg', () => { - const aggConfigs = getAggConfigs(singleTerm.aggs); - const otherAggConfig = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[0] as IBucketAggConfig, - singleTermResponse - ); - - expect(otherAggConfig).toBeDefined(); - if (otherAggConfig) { - const mergedResponse = mergeOtherBucketAggResponse( + describe(`mergeOtherBucketAggResponse${probability === 1 ? '- with sampling' : ''}`, () => { + test('correctly merges other bucket with single terms agg', () => { + const aggConfigs = getAggConfigs(singleTerm.aggs); + const otherAggConfig = buildOtherBucketAgg( aggConfigs, - singleTermResponse, - singleOtherResponse, aggConfigs.aggs[0] as IBucketAggConfig, - otherAggConfig(), - constructSingleTermOtherFilter + enrichResponseWithSampling(singleTermResponse) ); - expect((mergedResponse!.aggregations!['1'] as any).buckets[3].key).toEqual('__other__'); - } - }); - test('correctly merges other bucket with nested terms agg', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - const otherAggConfig = buildOtherBucketAgg( - aggConfigs, - aggConfigs.aggs[1] as IBucketAggConfig, - nestedTermResponse - ); + expect(otherAggConfig).toBeDefined(); + if (otherAggConfig) { + const mergedResponse = mergeOtherBucketAggResponse( + aggConfigs, + enrichResponseWithSampling(singleTermResponse), + enrichResponseWithSampling(singleOtherResponse), + aggConfigs.aggs[0] as IBucketAggConfig, + otherAggConfig(), + constructSingleTermOtherFilter + ); - expect(otherAggConfig).toBeDefined(); - if (otherAggConfig) { - const mergedResponse = mergeOtherBucketAggResponse( + const topAgg = getTopAggregations(mergedResponse); + expect((topAgg['1'] as any).buckets[3].key).toEqual('__other__'); + } + }); + + test('correctly merges other bucket with nested terms agg', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + const otherAggConfig = buildOtherBucketAgg( aggConfigs, - nestedTermResponse, - nestedOtherResponse, aggConfigs.aggs[1] as IBucketAggConfig, - otherAggConfig(), - constructSingleTermOtherFilter + enrichResponseWithSampling(nestedTermResponse) ); - expect((mergedResponse!.aggregations!['1'] as any).buckets[1]['2'].buckets[3].key).toEqual( - '__other__' - ); - } + expect(otherAggConfig).toBeDefined(); + if (otherAggConfig) { + const mergedResponse = mergeOtherBucketAggResponse( + aggConfigs, + enrichResponseWithSampling(nestedTermResponse), + enrichResponseWithSampling(nestedOtherResponse), + aggConfigs.aggs[1] as IBucketAggConfig, + otherAggConfig(), + constructSingleTermOtherFilter + ); + + const topAgg = getTopAggregations(mergedResponse); + expect((topAgg['1'] as any).buckets[1]['2'].buckets[3].key).toEqual('__other__'); + } + }); }); - }); - describe('updateMissingBucket', () => { - test('correctly updates missing bucket key', () => { - const aggConfigs = getAggConfigs(nestedTerm.aggs); - const updatedResponse = updateMissingBucket( - singleTermResponse, - aggConfigs, - aggConfigs.aggs[0] as IBucketAggConfig - ); - expect( - (updatedResponse!.aggregations!['1'] as any).buckets.find( - (bucket: Record) => bucket.key === '__missing__' - ) - ).toBeDefined(); + describe(`updateMissingBucket${probability === 1 ? '- with sampling' : ''}`, () => { + test('correctly updates missing bucket key', () => { + const aggConfigs = getAggConfigs(nestedTerm.aggs); + const updatedResponse = updateMissingBucket( + enrichResponseWithSampling(singleTermResponse), + aggConfigs, + aggConfigs.aggs[0] as IBucketAggConfig + ); + const topAgg = getTopAggregations(updatedResponse); + expect( + (topAgg['1'] as any).buckets.find( + (bucket: Record) => bucket.key === '__missing__' + ) + ).toBeDefined(); + }); }); - }); + } }); diff --git a/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.ts b/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.ts index 563436f56a4b38..68c64f67ef27f7 100644 --- a/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.ts +++ b/src/plugins/data/common/search/aggs/buckets/_terms_other_bucket_helper.ts @@ -130,10 +130,7 @@ const getOtherAggTerms = (requestAgg: Record, key: string, otherAgg }; /** - * - * @param requestAgg - * @param aggConfigs - * @returns + * Helper function to handle sampling case and get the correct cursor agg from a request object */ const getCorrectAggCursorFromRequest = ( requestAgg: Record, @@ -142,6 +139,9 @@ const getCorrectAggCursorFromRequest = ( return aggConfigs.isSamplingEnabled() ? requestAgg.sampling.aggs : requestAgg; }; +/** + * Helper function to handle sampling case and get the correct cursor agg from a response object + */ const getCorrectAggregationsCursorFromResponse = ( response: estypes.SearchResponse, aggConfigs: IAggConfigs