Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lens] Random sampling feature #143221

Merged
merged 28 commits into from
Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
be740b6
:sparkles: First pass with UI for random sampling
dej611 Oct 12, 2022
dc16774
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 12, 2022
b45c6de
:sparkles: Initial working version
dej611 Oct 12, 2022
1448a22
:fire: Remove unused stuff
dej611 Oct 13, 2022
7d05809
:wrench: Refactor layer settings panel
dej611 Oct 13, 2022
c341c02
:bug: Fix terms other query and some refactor
dej611 Oct 14, 2022
284e729
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 14, 2022
1360949
:label: Fix types issues
dej611 Oct 17, 2022
234e4a8
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 17, 2022
f7b9dd5
:bug: Fix sampling for other terms agg
dej611 Oct 17, 2022
862931b
:bug: Fix issue with count operation
dej611 Oct 17, 2022
5eb26e0
:white_check_mark: Fix jest tests
dej611 Oct 17, 2022
54c8fc8
:bug: fix test stability
dej611 Oct 17, 2022
3039c5d
:bug: fix test with newer params
dej611 Oct 17, 2022
8966788
:lipstick: Add tech preview label
dej611 Oct 18, 2022
a627ba0
:white_check_mark: Add new tests for sampling
dej611 Oct 18, 2022
8131b5e
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 18, 2022
281b8c9
:white_check_mark: Add more tests
dej611 Oct 18, 2022
13c8078
:white_check_mark: Add new test for suggestions
dej611 Oct 19, 2022
4f63f67
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 19, 2022
f9e1bb9
:white_check_mark: Add functional tests for layer actions and random …
dej611 Oct 19, 2022
bbc0fd2
Merge branch 'main' into feature/142958
dej611 Oct 19, 2022
f65125d
Merge branch 'main' into feature/142958
flash1293 Oct 19, 2022
165fe55
Merge branch 'main' into feature/142958
flash1293 Oct 20, 2022
60a4580
Merge remote-tracking branch 'upstream/main' into feature/142958
dej611 Oct 24, 2022
e71845e
Update x-pack/plugins/lens/public/datasources/form_based/layer_settin…
dej611 Oct 24, 2022
c4a2b12
Merge branch 'feature/142958' of github.com:dej611/kibana into featur…
dej611 Oct 24, 2022
45eb2f9
:ok_hand: Integrated design feedback
dej611 Oct 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions src/plugins/data/common/search/aggs/agg_configs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ import type { DataView } from '@kbn/data-views-plugin/common';
import { stubIndexPattern } from '../../stubs';
import { IEsSearchResponse } from '..';

// Mute moment.tz warnings about not finding a mock timezone
jest.mock('../utils', () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Debugging test failures was really hard with all warning from moment.tz. This fixed the issue assigning a fixed (supported) timezone for moment.

const original = jest.requireActual('../utils');
return {
...original,
getUserTimeZone: jest.fn(() => 'US/Pacific'),
};
});

describe('AggConfigs', () => {
const indexPattern: DataView = stubIndexPattern;
let typesRegistry: AggTypesRegistryStart;
Expand Down Expand Up @@ -563,6 +572,82 @@ describe('AggConfigs', () => {
'1-bucket>_count'
);
});

it('prepends a sampling agg whenever sampling is enabled', () => {
const configStates = [
{
enabled: true,
id: '1',
type: 'avg_bucket',
schema: 'metric',
params: {
customBucket: {
id: '1-bucket',
type: 'date_histogram',
schema: 'bucketAgg',
params: {
field: '@timestamp',
interval: '10s',
},
},
customMetric: {
id: '1-metric',
type: 'count',
schema: 'metricAgg',
params: {},
},
},
},
{
enabled: true,
id: '2',
type: 'terms',
schema: 'bucket',
params: {
field: 'clientip',
},
},
{
enabled: true,
id: '3',
type: 'terms',
schema: 'bucket',
params: {
field: 'machine.os.raw',
},
},
];

const ac = new AggConfigs(
indexPattern,
configStates,
{ typesRegistry, hierarchical: true, probability: 0.5 },
jest.fn()
);
const topLevelDsl = ac.toDsl();

expect(Object.keys(topLevelDsl)).toContain('sampling');
expect(Object.keys(topLevelDsl.sampling)).toEqual(['random_sampler', 'aggs']);
expect(Object.keys(topLevelDsl.sampling.aggs)).toContain('2');
expect(Object.keys(topLevelDsl.sampling.aggs['2'].aggs)).toEqual(['1', '3', '1-bucket']);
});

it('should not prepend a sampling agg when no nested agg is avaialble', () => {
const ac = new AggConfigs(
indexPattern,
[
{
enabled: true,
type: 'count',
schema: 'metric',
},
],
{ typesRegistry, probability: 0.5 },
jest.fn()
);
const topLevelDsl = ac.toDsl();
expect(Object.keys(topLevelDsl)).not.toContain('sampling');
});
});

describe('#postFlightTransform', () => {
Expand Down Expand Up @@ -854,4 +939,74 @@ describe('AggConfigs', () => {
`);
});
});

describe('isSamplingEnabled', () => {
it('should return false if probability is 1', () => {
const ac = new AggConfigs(
indexPattern,
[{ enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } }],
{ typesRegistry, probability: 1 },
jest.fn()
);

expect(ac.isSamplingEnabled()).toBeFalsy();
});

it('should return true if probability is less than 1', () => {
const ac = new AggConfigs(
indexPattern,
[{ enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } }],
{ typesRegistry, probability: 0.1 },
jest.fn()
);

expect(ac.isSamplingEnabled()).toBeTruthy();
});

it('should return false when all aggs have hasNoDsl flag enabled', () => {
const ac = new AggConfigs(
indexPattern,
[
{
enabled: true,
type: 'count',
schema: 'metric',
},
],
{ typesRegistry, probability: 1 },
jest.fn()
);

expect(ac.isSamplingEnabled()).toBeFalsy();
});

it('should return false when no nested aggs are avaialble', () => {
const ac = new AggConfigs(
indexPattern,
[{ enabled: false, type: 'avg', schema: 'metric', params: { field: 'bytes' } }],
{ typesRegistry, probability: 1 },
jest.fn()
);

expect(ac.isSamplingEnabled()).toBeFalsy();
});

it('should return true if at least one nested agg is available and probability < 1', () => {
const ac = new AggConfigs(
indexPattern,
[
{
enabled: true,
type: 'count',
schema: 'metric',
},
{ enabled: true, type: 'avg', schema: 'metric', params: { field: 'bytes' } },
],
{ typesRegistry, probability: 0.1 },
jest.fn()
);

expect(ac.isSamplingEnabled()).toBeTruthy();
});
});
});
36 changes: 34 additions & 2 deletions src/plugins/data/common/search/aggs/agg_configs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { AggTypesDependencies, GetConfigFn, getUserTimeZone } from '../..';
import { getTime, calculateBounds } from '../..';
import type { IBucketAggConfig } from './buckets';
import { insertTimeShiftSplit, mergeTimeShifts } from './utils/time_splits';
import { createSamplerAgg, isSamplingEnabled } from './utils/sampler';

function removeParentAggs(obj: any) {
for (const prop in obj) {
Expand Down Expand Up @@ -55,6 +56,8 @@ export interface AggConfigsOptions {
hierarchical?: boolean;
aggExecutionContext?: AggTypesDependencies['aggExecutionContext'];
partialRows?: boolean;
probability?: number;
samplerSeed?: number;
}

export type CreateAggConfigParams = Assign<AggConfigSerialized, { type: string | IAggType }>;
Expand Down Expand Up @@ -107,6 +110,17 @@ export class AggConfigs {
return this.opts.partialRows ?? false;
}

public get samplerConfig() {
return { probability: this.opts.probability ?? 1, seed: this.opts.samplerSeed };
}

isSamplingEnabled() {
return (
isSamplingEnabled(this.opts.probability) &&
this.getRequestAggs().filter((agg) => !agg.type.hasNoDsl).length > 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to follow...
Could you please explain this condition?

this.getRequestAggs().filter((agg) => !agg.type.hasNoDsl).length > 0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That condition is set to detect a specific case when no sub-agg is available for the sampling, therefore it makes no sense to have it. That's the case of Count of Records in Lens, where it should just return the number of documents.

);
}

setTimeFields(timeFields: string[] | undefined) {
this.timeFields = timeFields;
}
Expand Down Expand Up @@ -225,7 +239,7 @@ export class AggConfigs {
}

toDsl(): Record<string, any> {
const dslTopLvl = {};
const dslTopLvl: Record<string, any> = {};
let dslLvlCursor: Record<string, any>;
let nestedMetrics: Array<{ config: AggConfig; dsl: Record<string, any> }> | [];

Expand Down Expand Up @@ -254,10 +268,21 @@ export class AggConfigs {
(config) => 'splitForTimeShift' in config.type && config.type.splitForTimeShift(config, this)
);

if (this.isSamplingEnabled()) {
dslTopLvl.sampling = createSamplerAgg({
probability: this.opts.probability ?? 1,
seed: this.opts.samplerSeed,
});
}

requestAggs.forEach((config: AggConfig, i: number, list) => {
if (!dslLvlCursor) {
// start at the top level
dslLvlCursor = dslTopLvl;
// when sampling jump directly to the aggs
if (this.isSamplingEnabled()) {
dslLvlCursor = dslLvlCursor.sampling.aggs;
}
} else {
const prevConfig: AggConfig = list[i - 1];
const prevDsl = dslLvlCursor[prevConfig.id];
Expand Down Expand Up @@ -452,7 +477,12 @@ export class AggConfigs {
doc_count: response.rawResponse.hits?.total as estypes.AggregationsAggregate,
};
}
const aggCursor = transformedRawResponse.aggregations!;
const aggCursor = this.isSamplingEnabled()
? (transformedRawResponse.aggregations!.sampling! as Record<
string,
estypes.AggregationsAggregate
>)
: transformedRawResponse.aggregations!;

mergeTimeShifts(this, aggCursor);
return {
Expand Down Expand Up @@ -531,6 +561,8 @@ export class AggConfigs {
metricsAtAllLevels: this.hierarchical,
partialRows: this.partialRows,
aggs: this.aggs.map((agg) => buildExpression(agg.toExpressionAst())),
probability: this.opts.probability,
samplerSeed: this.opts.samplerSeed,
}),
]).toAst();
}
Expand Down
Loading